From 731538691d49544b92c78bdc6d9547026eba2419 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 16 Jul 2024 10:58:40 +0000 Subject: [PATCH 01/11] Factor out preprocessing logic to nf-core subworkflow --- main.nf | 1 + modules.json | 15 +- subworkflows/local/prepare_genome/main.nf | 7 +- .../utils_nfcore_rnaseq_pipeline/main.nf | 58 +--- .../main.nf | 320 ++++++++++++++++++ .../meta.yml | 132 ++++++++ .../tests/main.nf.test | 135 ++++++++ .../tests/main.nf.test.snap | 112 ++++++ .../tests/nextflow.config | 27 ++ .../tests/tags.yml | 2 + workflows/rnaseq/main.nf | 225 ++---------- 11 files changed, 779 insertions(+), 255 deletions(-) create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml diff --git a/main.nf b/main.nf index d3de0ae0b..152c320e4 100755 --- a/main.nf +++ b/main.nf @@ -117,6 +117,7 @@ workflow NFCORE_RNASEQ { PREPARE_GENOME.out.salmon_index, PREPARE_GENOME.out.kallisto_index, PREPARE_GENOME.out.bbsplit_index, + PREPARE_GENOME.out.ribo_db, PREPARE_GENOME.out.sortmerna_index, PREPARE_GENOME.out.splicesites, !params.remove_ribo_rna && params.remove_ribo_rna diff --git a/modules.json b/modules.json index b0125259a..346597902 100644 --- a/modules.json +++ b/modules.json @@ -8,7 +8,7 @@ "bbmap/bbsplit": { "branch": "master", "git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f", - "installed_by": ["modules"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": { "branch": "master", @@ -18,7 +18,7 @@ "cat/fastq": { "branch": "master", "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e", - "installed_by": ["modules"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "custom/catadditionalfasta": { "branch": "master", @@ -202,7 +202,7 @@ "sortmerna": { "branch": "master", "git_sha": "df05c8db5195867c0bc7b92c1788115b66f0d17d", - "installed_by": ["modules"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "star/align": { "branch": "master", @@ -315,17 +315,22 @@ "fastq_fastqc_umitools_fastp": { "branch": "master", "git_sha": "db35d26edeafacf9906a517827df621a29adc13d", - "installed_by": ["subworkflows"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "cb6defa0834eda9d6d3f967e981c819fc3e257bf", + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"] + }, + "fastq_qc_trim_filter_setstrandedness": { + "branch": "master", + "git_sha": "81ed776ec05bf9ae89da3442c29b5c5c7afa807b", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { "branch": "master", "git_sha": "727232afb8294b53dd9d05bfe469b70cce1675bb", - "installed_by": ["subworkflows"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"] }, "quantify_pseudo_alignment": { "branch": "master", diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 1e4ee46fd..050395442 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -228,7 +228,11 @@ workflow PREPARE_GENOME { // Uncompress sortmerna index or generate from scratch if required // ch_sortmerna_index = Channel.empty() + ch_ribo_db = Channel.empty() + if ('sortmerna' in prepare_tool_indices) { + ch_ribo_db = file(sortmerna_fasta_list) + if (sortmerna_index) { if (sortmerna_index.endsWith('.tar.gz')) { ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] } @@ -237,7 +241,7 @@ workflow PREPARE_GENOME { ch_sortmerna_index = Channel.value(file(sortmerna_index)) } } else { - ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines()) + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) .map { row -> file(row, checkIfExists: true) } .collect() .map { [ 'rrna_refs', it ] } @@ -370,6 +374,7 @@ workflow PREPARE_GENOME { chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes) splicesites = ch_splicesites // channel: path(genome.splicesites.txt) bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/) + ribo_db = ch_ribo_db // channel: path(sortmerna_fasta_list) sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/) star_index = ch_star_index // channel: path(star/index/) rsem_index = ch_rsem_index // channel: path(rsem/index/) diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf index fe080ce13..8022a5c1b 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf @@ -20,6 +20,7 @@ include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' include { logColours } from '../../nf-core/utils_nfcore_pipeline' +include { calculateStrandedness } from '../../nf-core/fastq_qc_trim_filter_setstrandedness' /* ======================================================================================== @@ -548,63 +549,6 @@ def biotypeInGtf(gtf_file, biotype) { } } -// -// Function to determine library type by comparing type counts. Consistent -// between Salmon and RSeQC -// -def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) { - def totalFragments = forwardFragments + reverseFragments + unstrandedFragments - def totalStrandedFragments = forwardFragments + reverseFragments - - def library_strandedness = 'undetermined' - if (totalStrandedFragments > 0) { - def forwardProportion = forwardFragments / (totalStrandedFragments as double) - def reverseProportion = reverseFragments / (totalStrandedFragments as double) - def proportionDifference = Math.abs(forwardProportion - reverseProportion) - - if (forwardProportion >= stranded_threshold) { - strandedness = 'forward' - } else if (reverseProportion >= stranded_threshold) { - strandedness = 'reverse' - } else if (proportionDifference <= unstranded_threshold) { - strandedness = 'unstranded' - } - } - - return [ - inferred_strandedness: strandedness, - forwardFragments: (forwardFragments / (totalFragments as double)) * 100, - reverseFragments: (reverseFragments / (totalFragments as double)) * 100, - unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100 - ] -} - -// -// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness -// -def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) { - // Parse the JSON content of the file - def libCounts = new JsonSlurper().parseText(json_file.text) - - // Calculate the counts for forward and reverse strand fragments - def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF'] - def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR'] - - // Calculate unstranded fragments (IU and U) - // NOTE: this is here for completeness, but actually all fragments have a - // strandedness (even if the overall library does not), so all these values - // will be '0'. See - // https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I - def unstrandedKeys = ['IU', 'U', 'MU'] - - def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum() - def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum() - def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum() - - // Use shared calculation function to determine strandedness - return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold) -} - // // Function that parses RSeQC infer_experiment output file to get inferred strandedness // diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf new file mode 100644 index 000000000..a17fc0b2b --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf @@ -0,0 +1,320 @@ +import groovy.json.JsonSlurper + +include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' +include { SORTMERNA } from '../../../modules/nf-core/sortmerna/main' +include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna/main' + +include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../fastq_subsample_fq_salmon' +include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore' +include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../fastq_fastqc_umitools_fastp' + +def pass_trimmed_reads = [:] + +// +// Function to determine library type by comparing type counts. +// + +// +def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) { + def totalFragments = forwardFragments + reverseFragments + unstrandedFragments + def totalStrandedFragments = forwardFragments + reverseFragments + + def library_strandedness = 'undetermined' + if (totalStrandedFragments > 0) { + def forwardProportion = forwardFragments / (totalStrandedFragments as double) + def reverseProportion = reverseFragments / (totalStrandedFragments as double) + def proportionDifference = Math.abs(forwardProportion - reverseProportion) + + if (forwardProportion >= stranded_threshold) { + strandedness = 'forward' + } else if (reverseProportion >= stranded_threshold) { + strandedness = 'reverse' + } else if (proportionDifference <= unstranded_threshold) { + strandedness = 'unstranded' + } + } + + return [ + inferred_strandedness: strandedness, + forwardFragments: (forwardFragments / (totalFragments as double)) * 100, + reverseFragments: (reverseFragments / (totalFragments as double)) * 100, + unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100 + ] +} + +// +// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness +// +def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) { + // Parse the JSON content of the file + def libCounts = new JsonSlurper().parseText(json_file.text) + + // Calculate the counts for forward and reverse strand fragments + def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF'] + def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR'] + + // Calculate unstranded fragments (IU and U) + // NOTE: this is here for completeness, but actually all fragments have a + // strandedness (even if the overall library does not), so all these values + // will be '0'. See + // https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I + def unstrandedKeys = ['IU', 'U', 'MU'] + + def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum() + def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum() + def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum() + + // Use shared calculation function to determine strandedness + return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold) +} + +// +// Create MultiQC tsv custom content from a list of values +// +public static String multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string +} + +workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { + + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: /path/to/genome.fasta + ch_transcript_fasta // channel: /path/to/transcript.fasta + ch_gtf // channel: /path/to/genome.gtf + ch_salmon_index // channel: /path/to/salmon/index/ (optional) + ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional) + ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional) + ch_ribo_db // channel: /path/to/ Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional) + skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads. + skip_fastqc // boolean: true/false + skip_trimming // boolean: true/false + skip_umi_extract // boolean: true/false + make_salmon_index // boolean: Whether to create salmon index before running salmon quant + make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna + trimmer // string (enum): 'fastp' or 'trimgalore' + min_trimmed_reads // integer: > 0 + save_trimmed // boolean: true/false + remove_ribo_rna // boolean: true/false: whether to run sortmerna to remove rrnas + with_umi // boolean: true/false: Enable UMI-based read deduplication. + umi_discard_read // integer: 0, 1 or 2 + stranded_threshold // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5 + unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded' + + main: + + ch_versions = Channel.empty() + ch_filtered_reads = Channel.empty() + ch_trim_read_count = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_reads + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } + + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( + ch_fastq.multiple + ) + .reads + .mix(ch_fastq.single) + .set { ch_filtered_reads } + + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null)) + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! + // + if (trimmer == 'trimgalore') { + FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( + ch_filtered_reads, + skip_fastqc, + with_umi, + skip_umi_extract, + skip_trimming, + umi_discard_read, + min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count + + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip + .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip) + .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log) + .mix(ch_multiqc_files) + } + + // + // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp + // + if (trimmer == 'fastp') { + FASTQ_FASTQC_UMITOOLS_FASTP ( + ch_filtered_reads, + skip_fastqc, + with_umi, + skip_umi_extract, + umi_discard_read, + skip_trimming, + [], + save_trimmed, + save_trimmed, + min_trimmed_reads + ) + ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count + + ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) + ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip + .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip) + .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.map{tuple(it[0], [it[1]])}) + .mix(ch_multiqc_files) + } + + // + // Get list of samples that failed trimming threshold for MultiQC report + // + + ch_trim_read_count + .map { + meta, num_reads -> + pass_trimmed_reads[meta.id] = true + if (num_reads <= min_trimmed_reads.toFloat()) { + pass_trimmed_reads[meta.id] = false + return [ "$meta.id\t$num_reads" ] + } + } + .collect() + .map { + tsv_data -> + def header = ["Sample", "Reads after trimming"] + multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_trimming_multiqc } + + ch_multiqc_files = ch_multiqc_files + .mix( + ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv') + ) + + // + // MODULE: Remove genome contaminant reads + // + if (!skip_bbsplit) { + BBMAP_BBSPLIT ( + ch_filtered_reads, + ch_bbsplit_index, + [], + [ [], [] ], + false + ) + + BBMAP_BBSPLIT.out.primary_fastq + .set { ch_filtered_reads } + + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) + } + + // + // MODULE: Remove ribosomal RNA reads + // + if (remove_ribo_rna) { + ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) + .map { row -> file(row, checkIfExists: true) } + .collect() + .map{ ['rrna_refs', it] } + + if (make_sortmerna_index) { + SORTMERNA_INDEX ( + [[],[]], + ch_sortmerna_fastas, + [[],[]] + ) + ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() + } + + SORTMERNA ( + ch_filtered_reads, + ch_sortmerna_fastas, + ch_sortmerna_index + ) + + SORTMERNA.out.reads + .set { ch_filtered_reads } + + ch_multiqc_files = ch_multiqc_files + .mix(SORTMERNA.out.log) + + ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) + } + + // Branch FastQ channels if 'auto' specified to infer strandedness + ch_filtered_reads + .branch { + meta, fastq -> + auto_strand : meta.strandedness == 'auto' + return [ meta, fastq ] + known_strand: meta.strandedness != 'auto' + return [ meta, fastq ] + } + .set { ch_strand_fastq } + + // + // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness + // + // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created + + ch_fasta + .combine(ch_strand_fastq.auto_strand) + .map { it.first() } + .first() + .set { ch_genome_fasta } + + FASTQ_SUBSAMPLE_FQ_SALMON ( + ch_strand_fastq.auto_strand, + ch_genome_fasta, + ch_transcript_fasta, + ch_gtf, + ch_salmon_index, + make_salmon_index + ) + ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) + + FASTQ_SUBSAMPLE_FQ_SALMON + .out + .lib_format_counts + .join(ch_strand_fastq.auto_strand) + .map { + meta, json, reads -> + def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=stranded_threshold, unstranded_threshold=unstranded_threshold) + strandedness = salmon_strand_analysis.inferred_strandedness + if (strandedness == 'undetermined') { + strandedness = 'unstranded' + } + return [ meta + [ strandedness: strandedness, salmon_strand_analysis: salmon_strand_analysis ], reads ] + } + .mix(ch_strand_fastq.known_strand) + .set { ch_strand_inferred_fastq } + + emit: + + reads = ch_strand_inferred_fastq + trim_read_count = ch_trim_read_count + + multiqc_files = ch_multiqc_files.transpose().map{it[1]} + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml new file mode 100644 index 000000000..cf7cd885f --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml @@ -0,0 +1,132 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_qc_trim_filter_setstrandedness" +description: Basic FASTQ preprocessing for RNA-seq +keywords: + - fastq + - rnaseq + - rrna + - trimming + - subsample + - strandedness +components: + - bbmap/bbsplit + - samtools/sort + - samtools/index + - cat + - cat/fastq + - sortmerna + - fastq_subsample_fq_salmon + - fastq_fastqc_umitools_trimgalore + - fastq_fastqc_umitools_fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ch_reads: + type: file + description: | + Channel with input FastQ files of size 1 and 2 for single-end and + paired-end data, respectively. + - ch_fasta: + type: file + description: Channel with genome sequence in fasta format + - ch_transcript_fasta: + type: file + description: Channel with transcriptome sequence in fasta format + - ch_gtf: + type: file + description: Channel with features in GTF format + - ch_salmon_index: + type: file + description: Directory containing Salmon index + - ch_sortmerna_index: + type: file + description: Directory containing sortmerna index + - ch_bbsplit_index: + type: file + description: Path to directory or tar.gz archive for pre-built BBSplit index + - ch_ribo_db: + type: file + description: | + Channel with text file containing paths to fasta files (one per line) + that will be used to create the database for SortMeRNA + - skip_bbsplit: + type: boolean + description: Whether to skip BBSplit for removal of non-reference genome reads + - skip_fastqc: + type: boolean + description: Whether to skip FastQC + - skip_trimming: + type: boolean + description: Whether to skip trimming + - skip_umi_extract: + type: boolean + description: | + Skip the UMI extraction from the read in case the UMIs have been moved + to the headers in advance of the pipeline run + - make_salmon_index: + type: boolean + description: Whether to create salmon index before running salmon quant + - make_sortmerna_index: + type: boolean + description: Whether to create sortmerna index before running sortmerna + - trimmer: + type: string + description: | + Specifies the trimming tool to use - available options are 'trimgalore' + and 'fastp' + - min_trimmed_reads: + type: integer + description: | + Minimum number of trimmed reads below which samples are removed from + further processing + - save_trimmed: + type: boolean + description: Save the trimmed FastQ files in the results directory? + - remove_ribo_rna: + type: boolean + description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA? + - with_umi: + type: boolean + description: Enable UMI-based read deduplication + - umi_discard_read: + type: integer + description: | + After UMI barcode extraction discard either R1 or R2 by setting this + parameter to 1 or 2, respectively + - stranded_threshold: + type: float + min: 0.5 + description: | + The fraction of stranded reads that must be assigned to a strandedness + for confident assignment. Must be at least 0.5. + - unstranded_threshold: + type: float + description: | + The difference in fraction of stranded reads assigned to 'forward' and + 'reverse' below which a sample is classified as 'unstranded'. + +output: + - reads: + type: file + description: Preprocessed fastq reads + pattern: "*.{fq,fastq}{,.gz}" + - multiqc_files: + type: file + description: MultiQC-compatible output files from tools used in prepreocessing + pattern: "*" + - trim_read_count: + type: integer + description: Number of reads remaining after trimming for all input samples + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@pinin4fjords" +maintainers: + - "@pinin4fjords" diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test new file mode 100644 index 000000000..b58ff098e --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test @@ -0,0 +1,135 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS" + script "../main.nf" + workflow "FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_qc_trim_filter_setstrandedness" + + tag "bbmap/bbsplit" + tag "cat" + tag "cat/fastq" + tag "fastqc" + tag "sortmerna" + tag "subworkflows/fastq_fastqc_umitools_trimgalore" + tag "subworkflows/fastq_fastqc_umitools_fastp" + tag "subworkflows/fastq_subsample_fq_salmon" + + + + test("homo_sapiens paired-end [fastq] fastp") { + + when { + workflow { + """ + ch_reads = Channel.of([ + [ id:'test', single_end:false, strandedness:'auto' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + + ch_ribo_db = file('ribo_db.txt') + ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') + + input[0] = ch_reads + input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta + input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta + input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf + input[4] = [] // ch_salmon_index + input[5] = [] // ch_sortmerna_index + input[6] = [] // ch_bbsplit_index + input[7] = ch_ribo_db // ch_ribo_db + input[8] = true // skip_bbsplit + input[9] = false // skip_fastqc + input[10] = false // skip_trimming + input[11] = true // skip_umi_extract + input[12] = true // make_salmon_index + input[13] = true // make_sortmerna_index + input[14] = 'fastp' // trimmer + input[15] = 10 // min_trimmed_reads + input[16] = true // save_trimmed + input[17] = true // remove_ribo_rna + input[18] = false // with_umi + input[19] = 0 // umi_discard_read + """ + } + } + + then { + def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip + def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip + assertAll( + { assert workflow.success}, + { assert snapshot(pelines1).md5().match("fastp_test_pe_reads_1_lines") }, + { assert snapshot(pelines1.size()).match("fastp_test_pe_reads_1_size") }, + { assert snapshot(pelines2).md5().match("fastp_test_pe_reads_2_lines") }, + { assert snapshot(pelines2.size()).match("fastp_test_pe_reads_2_size") }, + { assert snapshot(workflow.out.trim_read_count).match("fastp_read_count") } + // This doesn't work- 'cat' changes between Conda and Docker - + // leaving it here until we find a way to address that + // { assert snapshot(workflow.out.versions).match("fastp_versions") } + ) + } + } + test("homo_sapiens paired-end [fastq] trimgalore") { + + when { + workflow { + """ + ch_reads = Channel.of([ + [ id:'test', single_end:false, strandedness:'auto' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + + ch_ribo_db = file('ribo_db.txt') + ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') + + input[0] = ch_reads + input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta + input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta + input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf + input[4] = [] // ch_salmon_index + input[5] = [] // ch_sortmerna_index + input[6] = [] // ch_bbsplit_index + input[7] = ch_ribo_db // ch_ribo_db + input[8] = true // skip_bbsplit + input[9] = false // skip_fastqc + input[10] = false // skip_trimming + input[11] = true // skip_umi_extract + input[12] = true // make_salmon_index + input[13] = true // make_sortmerna_index + input[14] = 'fastp' // trimmer + input[15] = 10 // min_trimmed_reads + input[16] = true // save_trimmed + input[17] = true // remove_ribo_rna + input[18] = false // with_umi + input[19] = 0 // umi_discard_read + """ + } + } + + then { + def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip + def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip + assertAll( + { assert workflow.success}, + { assert snapshot(pelines1).md5().match("trimgalore_test_pe_reads_1_lines") }, + { assert snapshot(pelines1.size()).match("trimgalore_test_pe_reads_1_size") }, + { assert snapshot(pelines2).md5().match("trimgalore_test_pe_reads_2_lines") }, + { assert snapshot(pelines2.size()).match("trimgalore_test_pe_reads_2_size") }, + { assert snapshot(workflow.out.trim_read_count).match("trimgalore_read_count") } + // This doesn't work- 'cat' changes between Conda and Docker - + // leaving it here until we find a way to address that + //{ assert snapshot(workflow.out.versions).match("trimgalore_versions") } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap new file mode 100644 index 000000000..21bdb2492 --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "fastp_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "auto" + }, + 3022 + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T16:19:13.057802" + }, + "trimgalore_test_pe_reads_2_lines": { + "content": "eccf3e9e74589ff01c77fce7f4548e41", + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:44:07.667653" + }, + "fastp_test_pe_reads_1_size": { + "content": [ + 4508 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:43:46.173892" + }, + "trimgalore_test_pe_reads_1_size": { + "content": [ + 4508 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:44:07.642318" + }, + "trimgalore_test_pe_reads_1_lines": { + "content": "3868fc1caf09367141d2bbf47e158823", + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:44:07.641186" + }, + "fastp_test_pe_reads_2_lines": { + "content": "eccf3e9e74589ff01c77fce7f4548e41", + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:43:46.235022" + }, + "fastp_test_pe_reads_2_size": { + "content": [ + 4508 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:43:46.242006" + }, + "trimgalore_test_pe_reads_2_size": { + "content": [ + 4508 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:44:07.668644" + }, + "fastp_test_pe_reads_1_lines": { + "content": "3868fc1caf09367141d2bbf47e158823", + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:43:46.161535" + }, + "trimgalore_read_count": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "auto" + }, + 3022 + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-24T17:44:07.669435" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config new file mode 100644 index 000000000..9e33e4b33 --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config @@ -0,0 +1,27 @@ +// +// Read subsampling and strand inferring options +// + +process { + withName: 'FQ_SUBSAMPLE' { + ext.args = '--record-count 1000000 --seed 1' + ext.prefix = { "${meta.id}.subsampled" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' { + ext.args = '--skipQuant --minAssignedFrags 1' + publishDir = [ + enabled: false + ] + } + withName: 'SORTMERNA' { + ext.args = '--index 0' + } + withName: 'SORTMERNA_INDEX' { + ext.args = '--index 1' + } +} + diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml new file mode 100644 index 000000000..cafd4a33d --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/preprocess_rnaseq: + - subworkflows/nf-core/preprocess_rnaseq/** diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 30eac1986..f4075b1c3 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -19,7 +19,6 @@ include { ALIGN_STAR } from '../../subworkflows/local/align_star' include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem' include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { multiqcTsvFromList } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' -include { getSalmonInferredStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { getStarPercentMapped } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { biotypeInGtf } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline' @@ -68,6 +67,7 @@ include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig' include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/nf-core/quantify_pseudo_alignment' include { QUANTIFY_PSEUDO_ALIGNMENT } from '../../subworkflows/nf-core/quantify_pseudo_alignment' +include { FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,6 +98,7 @@ workflow RNASEQ { ch_salmon_index // channel: path(salmon/index/) ch_kallisto_index // channel: [ meta, path(kallisto/index/) ] ch_bbsplit_index // channel: path(bbsplit/index/) + ch_ribo_db // channel: path(sortmerna_fasta_list) ch_sortmerna_index // channel: path(sortmerna/index/) ch_splicesites // channel: path(genome.splicesites.txt) make_sortmerna_index // boolean: Whether to create an index before running sortmerna @@ -126,203 +127,40 @@ workflow RNASEQ { .map { checkSamplesAfterGrouping(it) } - .branch { - meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } - .set { ch_fastq } - - // - // MODULE: Concatenate FastQ files from same sample if required - // - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - - // - // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore! - // - ch_filtered_reads = Channel.empty() - ch_trim_read_count = Channel.empty() - if (params.trimmer == 'trimgalore') { - FASTQ_FASTQC_UMITOOLS_TRIMGALORE ( - ch_cat_fastq, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.skip_trimming, - params.umi_discard_read, - params.min_trimmed_reads - ) - ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads - ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}) // FastQC report - untrimmed - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.umi_log.collect{it[1]}) // umi-tools extract logs - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]}) // FastQC report - trimmed - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}) // Trim Galore! trimming report - ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) - } + .set{ ch_fastq } // - // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp + // Run RNA-seq FASTQ preprocessing subworkflow // - if (params.trimmer == 'fastp') { - FASTQ_FASTQC_UMITOOLS_FASTP ( - ch_cat_fastq, - params.skip_fastqc || params.skip_qc, - params.with_umi, - params.skip_umi_extract, - params.umi_discard_read, - params.skip_trimming, - [], - params.save_trimmed, - params.save_trimmed, - params.min_trimmed_reads - ) - ch_filtered_reads = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads - ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.umi_log.collect{it[1]}) // umi-tools extract logs - ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) - } - - // Save trim status for workflow summary - ch_trim_status = ch_trim_read_count - .map { - meta, num_reads -> - return [ meta.id, num_reads > params.min_trimmed_reads.toFloat() ] - } - // - // Get list of samples that failed trimming threshold for MultiQC report - // - ch_trim_read_count - .map { - meta, num_reads -> - if (num_reads <= params.min_trimmed_reads.toFloat()) { - return [ "$meta.id\t$num_reads" ] - } - } - .collect() - .map { - tsv_data -> - def header = ["Sample", "Reads after trimming"] - multiqcTsvFromList(tsv_data, header) - } - .set { ch_fail_trimming_multiqc } - ch_multiqc_files = ch_multiqc_files.mix(ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv')) - - // - // MODULE: Remove genome contaminant reads - // - if (!params.skip_bbsplit) { - BBMAP_BBSPLIT ( - ch_filtered_reads, - ch_bbsplit_index, - [], - [ [], [] ], - false - ) - .primary_fastq - .set { ch_filtered_reads } - ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first()) - } - - // - // MODULE: Remove ribosomal RNA reads - // - // Check rRNA databases for sortmerna - if (params.remove_ribo_rna) { - ch_ribo_db = file(params.ribo_database_manifest) - if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} - - Channel.from(ch_ribo_db.readLines()) - .map { row -> file(row, checkIfExists: true) } - .collect() - .map { [ 'rrna_refs', it ] } - .set { ch_sortmerna_fastas } - - if (make_sortmerna_index) { - SORTMERNA_INDEX ( - [ [],[] ], - ch_sortmerna_fastas, - [ [],[] ] - ) - ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() - } - - SORTMERNA ( - ch_filtered_reads, - ch_sortmerna_fastas, - ch_sortmerna_index - ) - .reads - .set { ch_filtered_reads } - - ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.collect{it[1]}) - ch_versions = ch_versions.mix(SORTMERNA.out.versions.first()) - } - - // - // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness - // - - // Branch FastQ channels if 'auto' specified to infer strandedness - ch_filtered_reads - .branch { - meta, fastq -> - auto_strand : meta.strandedness == 'auto' - return [ meta, fastq ] - known_strand: meta.strandedness != 'auto' - return [ meta, fastq ] - } - .set { ch_strand_fastq } - - // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created - ch_fasta - .combine(ch_strand_fastq.auto_strand) - .map { it.first() } - .first() - .set { ch_genome_fasta } - - def prepare_tool_indices = [] - if (!params.skip_pseudo_alignment && params.pseudo_aligner) { - prepare_tool_indices << params.pseudo_aligner - } - FASTQ_SUBSAMPLE_FQ_SALMON ( - ch_strand_fastq.auto_strand, - ch_genome_fasta, + FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS ( + ch_fastq, + ch_fasta, ch_transcript_fasta, ch_gtf, ch_salmon_index, - !params.salmon_index && !('salmon' in prepare_tool_indices) + ch_sortmerna_index, + ch_bbsplit_index, + ch_ribo_db, + params.skip_bbsplit, + params.skip_fastqc || params.skip_qc, + params.skip_trimming, + params.skip_umi_extract, + !params.salmon_index && params.pseudo_aligner == 'salmon' && !skip_pseudo_alignment, + !params.sortmerna_index && params.remove_ribo_rna, + params.trimmer, + params.min_trimmed_reads, + params.save_trimmed, + params.remove_ribo_rna, + params.with_umi, + params.umi_discard_read, + params.stranded_threshold, + params.unstranded_threshold ) - ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions) - FASTQ_SUBSAMPLE_FQ_SALMON - .out - .lib_format_counts - .join(ch_strand_fastq.auto_strand) - .map { - meta, json, reads -> - def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=params.stranded_threshold, unstranded_threshold=params.unstranded_threshold) - strandedness = salmon_strand_analysis.inferred_strandedness - if (strandedness == 'undetermined') { - strandedness = 'unstranded' - } - return [ meta + [ strandedness: strandedness, salmon_strand_analysis: salmon_strand_analysis ], reads ] - } - .mix(ch_strand_fastq.known_strand) - .set { ch_strand_inferred_filtered_fastq } + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.multiqc_files) + ch_versions = ch_versions.mix(FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.versions) + ch_strand_inferred_filtered_fastq = FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.reads // // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon @@ -870,11 +708,14 @@ workflow RNASEQ { // Provide MultiQC with rename patterns to ensure it uses sample names // for single-techrep samples not processed by CAT_FASTQ. - ch_name_replacements = ch_fastq.single + ch_name_replacements = ch_fastq + .filter{ meta, reads -> + reads.size() == 1 + } .map{ meta, reads -> - def name1 = file(reads[0]).simpleName + "\t" + meta.id + '_1' + def name1 = file(reads[0][0]).simpleName + "\t" + meta.id + '_1' if (reads[1] ){ - def name2 = file(reads[1]).simpleName + "\t" + meta.id + '_2' + def name2 = file(reads[0][1]).simpleName + "\t" + meta.id + '_2' return [ name1, name2 ] } else{ return name1 From 2389882790283afd7fdc2b3cf9fd2b53ea5d5b31 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 16 Jul 2024 11:09:37 +0000 Subject: [PATCH 02/11] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1e40c3cb..ac293d86a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -104,6 +104,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [PR #1336](https://github.com/nf-core/rnaseq/pull/1334) - Use nf-core/setup-nf-test to install nf-test from cache during CI/CD - [PR #1340](https://github.com/nf-core/rnaseq/pull/1340) - Remove out-of-date Azure specific guidance - [PR #1341](https://github.com/nf-core/rnaseq/pull/1341) - Add rename in the MultiQC report for samples without techreps +- [PR #1342](https://github.com/nf-core/rnaseq/pull/1342) - Factor out preprocessing ### Parameters From 4d308443227a6431e118456c2700a3f20bdab2df Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 09:16:57 +0100 Subject: [PATCH 03/11] Update RNAseq preprocessing swf --- .../nf-core/fastq_qc_trim_filter_setstrandedness/main.nf | 6 +++--- .../fastq_qc_trim_filter_setstrandedness/tests/main.nf.test | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf index a17fc0b2b..ac8217ee9 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf @@ -104,7 +104,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { remove_ribo_rna // boolean: true/false: whether to run sortmerna to remove rrnas with_umi // boolean: true/false: Enable UMI-based read deduplication. umi_discard_read // integer: 0, 1 or 2 - stranded_threshold // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5 + stranded_threshold // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5 unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded' main: @@ -296,8 +296,8 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { FASTQ_SUBSAMPLE_FQ_SALMON .out - .lib_format_counts - .join(ch_strand_fastq.auto_strand) + .lib_format_counts + .join(ch_strand_fastq.auto_strand) .map { meta, json, reads -> def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=stranded_threshold, unstranded_threshold=unstranded_threshold) diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test index b58ff098e..ad5eef725 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test @@ -56,6 +56,8 @@ nextflow_workflow { input[17] = true // remove_ribo_rna input[18] = false // with_umi input[19] = 0 // umi_discard_read + input[20] = 0.8 // stranded_threshold + input[21] = 0.1 // unstranded_threshold """ } } @@ -112,6 +114,8 @@ nextflow_workflow { input[17] = true // remove_ribo_rna input[18] = false // with_umi input[19] = 0 // umi_discard_read + input[20] = 0.8 // stranded_threshold + input[21] = 0.1 // unstranded_threshold """ } } From 9e8f1e0b4d3a98b0e07e48377f363beeb689e8fc Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 08:26:58 +0000 Subject: [PATCH 04/11] update modules.json --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 346597902..860d54c94 100644 --- a/modules.json +++ b/modules.json @@ -324,7 +324,7 @@ }, "fastq_qc_trim_filter_setstrandedness": { "branch": "master", - "git_sha": "81ed776ec05bf9ae89da3442c29b5c5c7afa807b", + "git_sha": "0a8d33545937d7ab061d0fd2499dc23891ddb6e0", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { From 6a22103bdd6615ebf53c661e20b7b3f7166e6d3f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 09:20:38 +0000 Subject: [PATCH 05/11] fix up ribo db wiring --- main.nf | 2 +- subworkflows/local/prepare_genome/main.nf | 12 +++++------- .../fastq_qc_trim_filter_setstrandedness/main.nf | 5 ++--- .../fastq_qc_trim_filter_setstrandedness/meta.yml | 6 +++--- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index 152c320e4..a0a6f1c7d 100755 --- a/main.nf +++ b/main.nf @@ -117,7 +117,7 @@ workflow NFCORE_RNASEQ { PREPARE_GENOME.out.salmon_index, PREPARE_GENOME.out.kallisto_index, PREPARE_GENOME.out.bbsplit_index, - PREPARE_GENOME.out.ribo_db, + PREPARE_GENOME.out.rrna_fastas, PREPARE_GENOME.out.sortmerna_index, PREPARE_GENOME.out.splicesites, !params.remove_ribo_rna && params.remove_ribo_rna diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 050395442..92f8cc04a 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -228,10 +228,10 @@ workflow PREPARE_GENOME { // Uncompress sortmerna index or generate from scratch if required // ch_sortmerna_index = Channel.empty() - ch_ribo_db = Channel.empty() + ch_rrna_fastas = Channel.empty() if ('sortmerna' in prepare_tool_indices) { - ch_ribo_db = file(sortmerna_fasta_list) + ribo_db = file(sortmerna_fasta_list) if (sortmerna_index) { if (sortmerna_index.endsWith('.tar.gz')) { @@ -241,14 +241,12 @@ workflow PREPARE_GENOME { ch_sortmerna_index = Channel.value(file(sortmerna_index)) } } else { - ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) + ch_rrna_fastas = Channel.from(ribo_db.readLines()) .map { row -> file(row, checkIfExists: true) } - .collect() - .map { [ 'rrna_refs', it ] } SORTMERNA_INDEX ( Channel.of([ [],[] ]), - ch_sortmerna_fastas, + ch_rrna_fastas.collect().map { [ 'rrna_refs', it ] }, Channel.of([ [],[] ]) ) ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() @@ -374,7 +372,7 @@ workflow PREPARE_GENOME { chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes) splicesites = ch_splicesites // channel: path(genome.splicesites.txt) bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/) - ribo_db = ch_ribo_db // channel: path(sortmerna_fasta_list) + rrna_fastas = ch_rrna_fastas // channel: path(sortmerna_fasta_list) sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/) star_index = ch_star_index // channel: path(star/index/) rsem_index = ch_rsem_index // channel: path(rsem/index/) diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf index ac8217ee9..0651753e1 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf @@ -91,7 +91,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { ch_salmon_index // channel: /path/to/salmon/index/ (optional) ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional) ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional) - ch_ribo_db // channel: /path/to/ Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional) + ch_rrna_fastas // channel: one or more fasta files containing rrna sequencs passed to SortMeRNA (optional) skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads. skip_fastqc // boolean: true/false skip_trimming // boolean: true/false @@ -233,8 +233,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { // MODULE: Remove ribosomal RNA reads // if (remove_ribo_rna) { - ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()) - .map { row -> file(row, checkIfExists: true) } + ch_sortmerna_fastas = ch_rrna_fastas .collect() .map{ ['rrna_refs', it] } diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml index cf7cd885f..6f92f56a0 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml @@ -47,11 +47,11 @@ input: - ch_bbsplit_index: type: file description: Path to directory or tar.gz archive for pre-built BBSplit index - - ch_ribo_db: + - ch_rrna_fastas: type: file description: | - Channel with text file containing paths to fasta files (one per line) - that will be used to create the database for SortMeRNA + Channel containing one or more FASTA files containing rRNA sequences + for use with SortMeRNA - skip_bbsplit: type: boolean description: Whether to skip BBSplit for removal of non-reference genome reads From 2447c6b76771860ed00900419f2e5bf4fa841a51 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 09:26:48 +0000 Subject: [PATCH 06/11] Install swf from branch for now --- modules.json | 4 ++-- .../nf-core/fastq_qc_trim_filter_setstrandedness/main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index 860d54c94..310fb392e 100644 --- a/modules.json +++ b/modules.json @@ -323,8 +323,8 @@ "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"] }, "fastq_qc_trim_filter_setstrandedness": { - "branch": "master", - "git_sha": "0a8d33545937d7ab061d0fd2499dc23891ddb6e0", + "branch": "rrna_prerpo_rrna_should_be_channel", + "git_sha": "d1e4c1977912284d6c4c1a1b351968cb6fcf1b6d", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf index 0651753e1..cd69166f3 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf @@ -91,7 +91,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS { ch_salmon_index // channel: /path/to/salmon/index/ (optional) ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional) ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional) - ch_rrna_fastas // channel: one or more fasta files containing rrna sequencs passed to SortMeRNA (optional) + ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional) skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads. skip_fastqc // boolean: true/false skip_trimming // boolean: true/false From 3da0bc3ec5c0c6259dc1537230d24b070351366b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 09:43:04 +0000 Subject: [PATCH 07/11] Fix tests --- .../tests/main.nf.test | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test index ad5eef725..038af9e30 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test @@ -33,9 +33,6 @@ nextflow_workflow { ] ]) - ch_ribo_db = file('ribo_db.txt') - ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') - input[0] = ch_reads input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta @@ -43,7 +40,7 @@ nextflow_workflow { input[4] = [] // ch_salmon_index input[5] = [] // ch_sortmerna_index input[6] = [] // ch_bbsplit_index - input[7] = ch_ribo_db // ch_ribo_db + input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas input[8] = true // skip_bbsplit input[9] = false // skip_fastqc input[10] = false // skip_trimming @@ -91,9 +88,6 @@ nextflow_workflow { ] ]) - ch_ribo_db = file('ribo_db.txt') - ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta') - input[0] = ch_reads input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta @@ -101,7 +95,7 @@ nextflow_workflow { input[4] = [] // ch_salmon_index input[5] = [] // ch_sortmerna_index input[6] = [] // ch_bbsplit_index - input[7] = ch_ribo_db // ch_ribo_db + input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas input[8] = true // skip_bbsplit input[9] = false // skip_fastqc input[10] = false // skip_trimming From 259dccfed00a19a18911739ec2031ea2b502ccea Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 09:45:10 +0000 Subject: [PATCH 08/11] Update modules.json --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 310fb392e..f16d62bbf 100644 --- a/modules.json +++ b/modules.json @@ -324,7 +324,7 @@ }, "fastq_qc_trim_filter_setstrandedness": { "branch": "rrna_prerpo_rrna_should_be_channel", - "git_sha": "d1e4c1977912284d6c4c1a1b351968cb6fcf1b6d", + "git_sha": "d41517e03cd875b9b469a0d27e31c7afd9d2250f", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { From fe950ba0b07aa2103ae250bfd24afb64104ebcbf Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 10:32:50 +0000 Subject: [PATCH 09/11] Move strandedness function testing to swf --- modules.json | 2 +- .../tests/main.function.nf.test | 109 ---------------- .../tests/main.function.nf.test.snap | 45 ------- .../tests/main.function.nf.test | 118 ++++++++++++++++++ .../tests/main.function.nf.test.snap | 47 +++++++ .../tests/main.nf.test | 2 + .../tests/main.nf.test.snap | 40 +++--- 7 files changed, 188 insertions(+), 175 deletions(-) create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap diff --git a/modules.json b/modules.json index f16d62bbf..9d8b94799 100644 --- a/modules.json +++ b/modules.json @@ -324,7 +324,7 @@ }, "fastq_qc_trim_filter_setstrandedness": { "branch": "rrna_prerpo_rrna_should_be_channel", - "git_sha": "d41517e03cd875b9b469a0d27e31c7afd9d2250f", + "git_sha": "e9fde2d1f5d38e2ed61c217aef44e23e8ed07103", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test index c809b2117..8390034c5 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test @@ -390,115 +390,6 @@ nextflow_function { } - test("Test Function getSalmonInferredStrandedness unstranded") { - - function "getSalmonInferredStrandedness" - - when { - function { - """ - import groovy.json.JsonOutput - - // Define the JSON contents for the test - def json_contents = JsonOutput.toJson([ - "SF": 0, - "SR": 0, - "ISF": 100, - "ISR": 100, - "IU": 0, - "U": 0 - ]) - def jsonFile = file("${workDir}/salmonUnstranded.json") - jsonFile.write(json_contents) - - input[0] = jsonFile - input[1] = 0.8 - input[2] = 0.1 - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - - } - - test("Test Function getSalmonInferredStrandedness forward") { - - function "getSalmonInferredStrandedness" - - when { - function { - """ - import groovy.json.JsonOutput - - def json_contents = JsonOutput.toJson([ - "SF": 0, - "SR": 0, - "ISF": 100, - "ISR": 0, - "IU": 0, - "U": 0 - ]) - def jsonFile = file("${workDir}/salmonForward.json") - jsonFile.write(json_contents) - - input[0] = jsonFile - input[1] = 0.8 - input[2] = 0.1 - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - - } - - test("Test Function getSalmonInferredStrandedness reverse") { - - function "getSalmonInferredStrandedness" - - when { - function { - """ - import groovy.json.JsonOutput - - def json_contents = JsonOutput.toJson([ - "SF": 0, - "SR": 0, - "ISF": 0, - "ISR": 100, - "IU": 0, - "U": 0 - ]) - def jsonFile = file("${workDir}/salmonReverse.json") - jsonFile.write(json_contents) - - input[0] = jsonFile - input[1] = 0.8 - input[2] = 0.1 - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - - } - test("Test Function getStarPercentMapped pass") { function "getStarPercentMapped" diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap index dd632fbc3..ed2d57860 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap @@ -45,36 +45,6 @@ }, "timestamp": "2024-03-06T14:33:26.903306" }, - "Test Function getSalmonInferredStrandedness unstranded": { - "content": [ - { - "inferred_strandedness": "unstranded", - "forwardFragments": 50.0, - "reverseFragments": 50.0, - "unstrandedFragments": 0.0 - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-06-18T14:29:54.96715" - }, - "Test Function getSalmonInferredStrandedness reverse": { - "content": [ - { - "inferred_strandedness": "reverse", - "forwardFragments": 0.0, - "reverseFragments": 100.0, - "unstrandedFragments": 0.0 - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-06-18T14:30:11.417381" - }, "Test Function checkSamplesAfterGrouping invalid strandedness": { "content": null, "meta": { @@ -215,21 +185,6 @@ }, "timestamp": "2024-03-06T14:32:49.565504" }, - "Test Function getSalmonInferredStrandedness forward": { - "content": [ - { - "inferred_strandedness": "forward", - "forwardFragments": 100.0, - "reverseFragments": 0.0, - "unstrandedFragments": 0.0 - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-06-18T14:30:03.301262" - }, "Test Function rsemStarIndexWarn": { "content": null, "meta": { diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test new file mode 100644 index 000000000..57fcd302b --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test @@ -0,0 +1,118 @@ +nextflow_function { + + name "Test Functions" + script "../main.nf" + + // + // Test function for deriving strandedness from Salmon numbers + // + + test("Test Function getSalmonInferredStrandedness unstranded") { + + function "getSalmonInferredStrandedness" + + when { + function { + """ + import groovy.json.JsonOutput + + // Define the JSON contents for the test + def json_contents = JsonOutput.toJson([ + "SF": 0, + "SR": 0, + "ISF": 100, + "ISR": 100, + "IU": 0, + "U": 0 + ]) + def jsonFile = file("${workDir}/salmonUnstranded.json") + jsonFile.write(json_contents) + + input[0] = jsonFile + input[1] = 0.8 + input[2] = 0.1 + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + + } + + test("Test Function getSalmonInferredStrandedness forward") { + + function "getSalmonInferredStrandedness" + + when { + function { + """ + import groovy.json.JsonOutput + + def json_contents = JsonOutput.toJson([ + "SF": 0, + "SR": 0, + "ISF": 100, + "ISR": 0, + "IU": 0, + "U": 0 + ]) + def jsonFile = file("${workDir}/salmonForward.json") + jsonFile.write(json_contents) + + input[0] = jsonFile + input[1] = 0.8 + input[2] = 0.1 + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + + } + + test("Test Function getSalmonInferredStrandedness reverse") { + + function "getSalmonInferredStrandedness" + + when { + function { + """ + import groovy.json.JsonOutput + + def json_contents = JsonOutput.toJson([ + "SF": 0, + "SR": 0, + "ISF": 0, + "ISR": 100, + "IU": 0, + "U": 0 + ]) + def jsonFile = file("${workDir}/salmonReverse.json") + jsonFile.write(json_contents) + + input[0] = jsonFile + input[1] = 0.8 + input[2] = 0.1 + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + + } +} diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap new file mode 100644 index 000000000..4b4e2e874 --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap @@ -0,0 +1,47 @@ +{ + "Test Function getSalmonInferredStrandedness forward": { + "content": [ + { + "inferred_strandedness": "forward", + "forwardFragments": 100.0, + "reverseFragments": 0.0, + "unstrandedFragments": 0.0 + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:28:59.284820592" + }, + "Test Function getSalmonInferredStrandedness unstranded": { + "content": [ + { + "inferred_strandedness": "unstranded", + "forwardFragments": 50.0, + "reverseFragments": 50.0, + "unstrandedFragments": 0.0 + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:28:47.514036696" + }, + "Test Function getSalmonInferredStrandedness reverse": { + "content": [ + { + "inferred_strandedness": "reverse", + "forwardFragments": 0.0, + "reverseFragments": 100.0, + "unstrandedFragments": 0.0 + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-17T10:29:11.43961965" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test index 038af9e30..5242f2bee 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test @@ -130,4 +130,6 @@ nextflow_workflow { ) } } + + } diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap index 21bdb2492..985349317 100644 --- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap @@ -14,17 +14,17 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T16:19:13.057802" + "timestamp": "2024-07-17T10:24:00.044553245" }, "trimgalore_test_pe_reads_2_lines": { "content": "eccf3e9e74589ff01c77fce7f4548e41", "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:44:07.667653" + "timestamp": "2024-07-17T10:24:26.838793051" }, "fastp_test_pe_reads_1_size": { "content": [ @@ -32,9 +32,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:43:46.173892" + "timestamp": "2024-07-17T10:23:59.889337984" }, "trimgalore_test_pe_reads_1_size": { "content": [ @@ -42,25 +42,25 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:44:07.642318" + "timestamp": "2024-07-17T10:24:26.778599725" }, "trimgalore_test_pe_reads_1_lines": { "content": "3868fc1caf09367141d2bbf47e158823", "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:44:07.641186" + "timestamp": "2024-07-17T10:24:26.774975135" }, "fastp_test_pe_reads_2_lines": { "content": "eccf3e9e74589ff01c77fce7f4548e41", "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:43:46.235022" + "timestamp": "2024-07-17T10:23:59.997625278" }, "fastp_test_pe_reads_2_size": { "content": [ @@ -68,9 +68,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:43:46.242006" + "timestamp": "2024-07-17T10:24:00.042449965" }, "trimgalore_test_pe_reads_2_size": { "content": [ @@ -78,17 +78,17 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:44:07.668644" + "timestamp": "2024-07-17T10:24:26.841434261" }, "fastp_test_pe_reads_1_lines": { "content": "3868fc1caf09367141d2bbf47e158823", "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:43:46.161535" + "timestamp": "2024-07-17T10:23:59.882844295" }, "trimgalore_read_count": { "content": [ @@ -105,8 +105,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-02-24T17:44:07.669435" + "timestamp": "2024-07-17T10:24:26.84402498" } } \ No newline at end of file From 6bb1a791ad4ec46f563686957a99682da79db06e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 12:12:14 +0100 Subject: [PATCH 10/11] Install from modules master --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 9d8b94799..8d89cfcbb 100644 --- a/modules.json +++ b/modules.json @@ -323,8 +323,8 @@ "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"] }, "fastq_qc_trim_filter_setstrandedness": { - "branch": "rrna_prerpo_rrna_should_be_channel", - "git_sha": "e9fde2d1f5d38e2ed61c217aef44e23e8ed07103", + "branch": "master", + "git_sha": "b86de50ab60c19ab40e70a4501820f4cb307050b", "installed_by": ["subworkflows"] }, "fastq_subsample_fq_salmon": { From 1049fa57dd5cb93efb48f4a3d4733d44b54812fb Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 17 Jul 2024 13:16:21 +0000 Subject: [PATCH 11/11] Strip preprocessing components relocated to subworkflows --- workflows/rnaseq/main.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index f4075b1c3..896159378 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -32,8 +32,6 @@ include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_ // // MODULE: Installed directly from nf-core/modules // -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq' -include { BBMAP_BBSPLIT } from '../../modules/nf-core/bbmap/bbsplit' include { DUPRADAR } from '../../modules/nf-core/dupradar' include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap' @@ -42,8 +40,6 @@ include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/string include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts' include { MULTIQC } from '../../modules/nf-core/multiqc' include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/nf-core/umitools/prepareforrsem' -include { SORTMERNA } from '../../modules/nf-core/sortmerna' -include { SORTMERNA as SORTMERNA_INDEX } from '../../modules/nf-core/sortmerna' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW } from '../../modules/nf-core/bedtools/genomecov' include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV } from '../../modules/nf-core/bedtools/genomecov' @@ -54,9 +50,6 @@ include { paramsSummaryMap } from 'plugin/nf-validation' include { fromSamplesheet } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' -include { FASTQ_SUBSAMPLE_FQ_SALMON } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon' -include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore' -include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp' include { FASTQ_ALIGN_HISAT2 } from '../../subworkflows/nf-core/fastq_align_hisat2' include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard'