From 731538691d49544b92c78bdc6d9547026eba2419 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Tue, 16 Jul 2024 10:58:40 +0000
Subject: [PATCH 01/11] Factor out preprocessing logic to nf-core subworkflow

---
 main.nf                                       |   1 +
 modules.json                                  |  15 +-
 subworkflows/local/prepare_genome/main.nf     |   7 +-
 .../utils_nfcore_rnaseq_pipeline/main.nf      |  58 +---
 .../main.nf                                   | 320 ++++++++++++++++++
 .../meta.yml                                  | 132 ++++++++
 .../tests/main.nf.test                        | 135 ++++++++
 .../tests/main.nf.test.snap                   | 112 ++++++
 .../tests/nextflow.config                     |  27 ++
 .../tests/tags.yml                            |   2 +
 workflows/rnaseq/main.nf                      | 225 ++----------
 11 files changed, 779 insertions(+), 255 deletions(-)
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml

diff --git a/main.nf b/main.nf
index d3de0ae0b..152c320e4 100755
--- a/main.nf
+++ b/main.nf
@@ -117,6 +117,7 @@ workflow NFCORE_RNASEQ {
         PREPARE_GENOME.out.salmon_index,
         PREPARE_GENOME.out.kallisto_index,
         PREPARE_GENOME.out.bbsplit_index,
+        PREPARE_GENOME.out.ribo_db,
         PREPARE_GENOME.out.sortmerna_index,
         PREPARE_GENOME.out.splicesites,
         !params.remove_ribo_rna && params.remove_ribo_rna
diff --git a/modules.json b/modules.json
index b0125259a..346597902 100644
--- a/modules.json
+++ b/modules.json
@@ -8,7 +8,7 @@
                     "bbmap/bbsplit": {
                         "branch": "master",
                         "git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f",
-                        "installed_by": ["modules"]
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
                     },
                     "bedtools/genomecov": {
                         "branch": "master",
@@ -18,7 +18,7 @@
                     "cat/fastq": {
                         "branch": "master",
                         "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e",
-                        "installed_by": ["modules"]
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
                     },
                     "custom/catadditionalfasta": {
                         "branch": "master",
@@ -202,7 +202,7 @@
                     "sortmerna": {
                         "branch": "master",
                         "git_sha": "df05c8db5195867c0bc7b92c1788115b66f0d17d",
-                        "installed_by": ["modules"]
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"]
                     },
                     "star/align": {
                         "branch": "master",
@@ -315,17 +315,22 @@
                     "fastq_fastqc_umitools_fastp": {
                         "branch": "master",
                         "git_sha": "db35d26edeafacf9906a517827df621a29adc13d",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
                     },
                     "fastq_fastqc_umitools_trimgalore": {
                         "branch": "master",
                         "git_sha": "cb6defa0834eda9d6d3f967e981c819fc3e257bf",
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
+                    },
+                    "fastq_qc_trim_filter_setstrandedness": {
+                        "branch": "master",
+                        "git_sha": "81ed776ec05bf9ae89da3442c29b5c5c7afa807b",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {
                         "branch": "master",
                         "git_sha": "727232afb8294b53dd9d05bfe469b70cce1675bb",
-                        "installed_by": ["subworkflows"]
+                        "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
                     },
                     "quantify_pseudo_alignment": {
                         "branch": "master",
diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
index 1e4ee46fd..050395442 100644
--- a/subworkflows/local/prepare_genome/main.nf
+++ b/subworkflows/local/prepare_genome/main.nf
@@ -228,7 +228,11 @@ workflow PREPARE_GENOME {
     // Uncompress sortmerna index or generate from scratch if required
     //
     ch_sortmerna_index = Channel.empty()
+    ch_ribo_db = Channel.empty()
+
     if ('sortmerna' in prepare_tool_indices) {
+        ch_ribo_db = file(sortmerna_fasta_list)
+
         if (sortmerna_index) {
             if (sortmerna_index.endsWith('.tar.gz')) {
                 ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] }
@@ -237,7 +241,7 @@ workflow PREPARE_GENOME {
                 ch_sortmerna_index = Channel.value(file(sortmerna_index))
             }
         } else {
-            ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines())
+            ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
                 .map { row -> file(row, checkIfExists: true) }
                 .collect()
                 .map { [ 'rrna_refs', it ] }
@@ -370,6 +374,7 @@ workflow PREPARE_GENOME {
     chrom_sizes      = ch_chrom_sizes            // channel: path(genome.sizes)
     splicesites      = ch_splicesites            // channel: path(genome.splicesites.txt)
     bbsplit_index    = ch_bbsplit_index          // channel: path(bbsplit/index/)
+    ribo_db          = ch_ribo_db                // channel: path(sortmerna_fasta_list)
     sortmerna_index  = ch_sortmerna_index        // channel: path(sortmerna/index/)
     star_index       = ch_star_index             // channel: path(star/index/)
     rsem_index       = ch_rsem_index             // channel: path(rsem/index/)
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
index fe080ce13..8022a5c1b 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf
@@ -20,6 +20,7 @@ include { imNotification            } from '../../nf-core/utils_nfcore_pipeline'
 include { UTILS_NFCORE_PIPELINE     } from '../../nf-core/utils_nfcore_pipeline'
 include { workflowCitation          } from '../../nf-core/utils_nfcore_pipeline'
 include { logColours                } from '../../nf-core/utils_nfcore_pipeline'
+include { calculateStrandedness     } from '../../nf-core/fastq_qc_trim_filter_setstrandedness'
 
 /*
 ========================================================================================
@@ -548,63 +549,6 @@ def biotypeInGtf(gtf_file, biotype) {
     }
 }
 
-//
-// Function to determine library type by comparing type counts. Consistent
-// between Salmon and RSeQC
-//
-def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) {
-    def totalFragments = forwardFragments + reverseFragments + unstrandedFragments
-    def totalStrandedFragments = forwardFragments + reverseFragments
-
-    def library_strandedness = 'undetermined'
-    if (totalStrandedFragments > 0) {
-        def forwardProportion = forwardFragments / (totalStrandedFragments as double)
-        def reverseProportion = reverseFragments / (totalStrandedFragments as double)
-        def proportionDifference = Math.abs(forwardProportion - reverseProportion)
-
-        if (forwardProportion >= stranded_threshold) {
-            strandedness = 'forward'
-        } else if (reverseProportion >= stranded_threshold) {
-            strandedness = 'reverse'
-        } else if (proportionDifference <= unstranded_threshold) {
-            strandedness = 'unstranded'
-        }
-    }
-
-    return [
-        inferred_strandedness: strandedness,
-        forwardFragments: (forwardFragments / (totalFragments as double)) * 100,
-        reverseFragments: (reverseFragments / (totalFragments as double)) * 100,
-        unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100
-    ]
-}
-
-//
-// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness
-//
-def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) {
-    // Parse the JSON content of the file
-    def libCounts = new JsonSlurper().parseText(json_file.text)
-
-    // Calculate the counts for forward and reverse strand fragments
-    def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF']
-    def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR']
-
-    // Calculate unstranded fragments (IU and U)
-    // NOTE: this is here for completeness, but actually all fragments have a
-    // strandedness (even if the overall library does not), so all these values
-    // will be '0'. See
-    // https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I
-    def unstrandedKeys = ['IU', 'U', 'MU']
-
-    def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum()
-    def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum()
-    def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum()
-
-    // Use shared calculation function to determine strandedness
-    return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold)
-}
-
 //
 // Function that parses RSeQC infer_experiment output file to get inferred strandedness
 //
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
new file mode 100644
index 000000000..a17fc0b2b
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
@@ -0,0 +1,320 @@
+import groovy.json.JsonSlurper
+
+include { BBMAP_BBSPLIT                   } from '../../../modules/nf-core/bbmap/bbsplit'
+include { CAT_FASTQ                       } from '../../../modules/nf-core/cat/fastq/main'
+include { SORTMERNA                       } from '../../../modules/nf-core/sortmerna/main'
+include { SORTMERNA as SORTMERNA_INDEX    } from '../../../modules/nf-core/sortmerna/main'
+
+include { FASTQ_SUBSAMPLE_FQ_SALMON        } from '../fastq_subsample_fq_salmon'
+include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../fastq_fastqc_umitools_trimgalore'
+include { FASTQ_FASTQC_UMITOOLS_FASTP      } from '../fastq_fastqc_umitools_fastp'
+
+def pass_trimmed_reads = [:]
+
+//
+// Function to determine library type by comparing type counts.
+//
+
+//
+def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) {
+    def totalFragments = forwardFragments + reverseFragments + unstrandedFragments
+    def totalStrandedFragments = forwardFragments + reverseFragments
+
+    def library_strandedness = 'undetermined'
+    if (totalStrandedFragments > 0) {
+        def forwardProportion = forwardFragments / (totalStrandedFragments as double)
+        def reverseProportion = reverseFragments / (totalStrandedFragments as double)
+        def proportionDifference = Math.abs(forwardProportion - reverseProportion)
+
+        if (forwardProportion >= stranded_threshold) {
+            strandedness = 'forward'
+        } else if (reverseProportion >= stranded_threshold) {
+            strandedness = 'reverse'
+        } else if (proportionDifference <= unstranded_threshold) {
+            strandedness = 'unstranded'
+        }
+    }
+
+    return [
+        inferred_strandedness: strandedness,
+        forwardFragments: (forwardFragments / (totalFragments as double)) * 100,
+        reverseFragments: (reverseFragments / (totalFragments as double)) * 100,
+        unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100
+    ]
+}
+
+//
+// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness
+//
+def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) {
+    // Parse the JSON content of the file
+    def libCounts = new JsonSlurper().parseText(json_file.text)
+
+    // Calculate the counts for forward and reverse strand fragments
+    def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF']
+    def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR']
+
+    // Calculate unstranded fragments (IU and U)
+    // NOTE: this is here for completeness, but actually all fragments have a
+    // strandedness (even if the overall library does not), so all these values
+    // will be '0'. See
+    // https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I
+    def unstrandedKeys = ['IU', 'U', 'MU']
+
+    def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum()
+    def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum()
+    def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum()
+
+    // Use shared calculation function to determine strandedness
+    return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold)
+}
+
+//
+// Create MultiQC tsv custom content from a list of values
+//
+public static String multiqcTsvFromList(tsv_data, header) {
+    def tsv_string = ""
+    if (tsv_data.size() > 0) {
+        tsv_string += "${header.join('\t')}\n"
+        tsv_string += tsv_data.join('\n')
+    }
+    return tsv_string
+}
+
+workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
+
+    take:
+    ch_reads             // channel: [ val(meta), [ reads ] ]
+    ch_fasta             // channel: /path/to/genome.fasta
+    ch_transcript_fasta  // channel: /path/to/transcript.fasta
+    ch_gtf               // channel: /path/to/genome.gtf
+    ch_salmon_index      // channel: /path/to/salmon/index/ (optional)
+    ch_sortmerna_index   // channel: /path/to/sortmerna/index/ (optional)
+    ch_bbsplit_index     // channel: /path/to/bbsplit/index/ (optional)
+    ch_ribo_db           // channel: /path/to/ Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional)
+    skip_bbsplit         // boolean: Skip BBSplit for removal of non-reference genome reads.
+    skip_fastqc          // boolean: true/false
+    skip_trimming        // boolean: true/false
+    skip_umi_extract     // boolean: true/false
+    make_salmon_index    // boolean: Whether to create salmon index before running salmon quant
+    make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna
+    trimmer              // string (enum): 'fastp' or 'trimgalore'
+    min_trimmed_reads    // integer: > 0
+    save_trimmed         // boolean: true/false
+    remove_ribo_rna      // boolean: true/false: whether to run sortmerna to remove rrnas
+    with_umi             // boolean: true/false: Enable UMI-based read deduplication.
+    umi_discard_read     // integer: 0, 1 or 2
+    stranded_threshold   // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5 
+    unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'
+
+    main:
+
+    ch_versions        = Channel.empty()
+    ch_filtered_reads  = Channel.empty()
+    ch_trim_read_count = Channel.empty()
+    ch_multiqc_files   = Channel.empty()
+
+    ch_reads
+        .branch {
+            meta, fastqs ->
+                single  : fastqs.size() == 1
+                    return [ meta, fastqs.flatten() ]
+                multiple: fastqs.size() > 1
+                    return [ meta, fastqs.flatten() ]
+        }
+        .set { ch_fastq }
+
+    //
+    // MODULE: Concatenate FastQ files from same sample if required
+    //
+    CAT_FASTQ (
+        ch_fastq.multiple
+    )
+    .reads
+    .mix(ch_fastq.single)
+    .set { ch_filtered_reads }
+
+    ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))
+
+    //
+    // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
+    //
+    if (trimmer == 'trimgalore') {
+        FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
+            ch_filtered_reads,
+            skip_fastqc,
+            with_umi,
+            skip_umi_extract,
+            skip_trimming,
+            umi_discard_read,
+            min_trimmed_reads
+        )
+        ch_filtered_reads      = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads
+        ch_trim_read_count     = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count
+
+        ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
+        ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip
+            .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip)
+            .mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log)
+            .mix(ch_multiqc_files)
+    }
+
+    //
+    // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp
+    //
+    if (trimmer == 'fastp') {
+        FASTQ_FASTQC_UMITOOLS_FASTP (
+            ch_filtered_reads,
+            skip_fastqc,
+            with_umi,
+            skip_umi_extract,
+            umi_discard_read,
+            skip_trimming,
+            [],
+            save_trimmed,
+            save_trimmed,
+            min_trimmed_reads
+        )
+        ch_filtered_reads      = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
+        ch_trim_read_count     = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count
+
+        ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
+        ch_multiqc_files = FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip
+            .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip)
+            .mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.map{tuple(it[0], [it[1]])})
+            .mix(ch_multiqc_files)
+    }
+
+    //
+    // Get list of samples that failed trimming threshold for MultiQC report
+    //
+
+    ch_trim_read_count
+        .map {
+            meta, num_reads ->
+                pass_trimmed_reads[meta.id] = true
+                if (num_reads <= min_trimmed_reads.toFloat()) {
+                    pass_trimmed_reads[meta.id] = false
+                    return [ "$meta.id\t$num_reads" ]
+                }
+        }
+        .collect()
+        .map {
+            tsv_data ->
+                def header = ["Sample", "Reads after trimming"]
+                multiqcTsvFromList(tsv_data, header)
+        }
+        .set { ch_fail_trimming_multiqc }
+
+    ch_multiqc_files = ch_multiqc_files
+        .mix(
+            ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv')
+        )
+
+    //
+    // MODULE: Remove genome contaminant reads
+    //
+    if (!skip_bbsplit) {
+        BBMAP_BBSPLIT (
+            ch_filtered_reads,
+            ch_bbsplit_index,
+            [],
+            [ [], [] ],
+            false
+        )
+
+        BBMAP_BBSPLIT.out.primary_fastq
+            .set { ch_filtered_reads }
+
+        ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
+    }
+
+    //
+    // MODULE: Remove ribosomal RNA reads
+    //
+    if (remove_ribo_rna) {
+        ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
+            .map { row -> file(row, checkIfExists: true) }
+            .collect()
+            .map{ ['rrna_refs', it] }
+
+        if (make_sortmerna_index) {
+            SORTMERNA_INDEX (
+                [[],[]],
+                ch_sortmerna_fastas,
+                [[],[]]
+            )
+            ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
+        }
+
+        SORTMERNA (
+            ch_filtered_reads,
+            ch_sortmerna_fastas,
+            ch_sortmerna_index
+        )
+
+        SORTMERNA.out.reads
+            .set { ch_filtered_reads }
+
+        ch_multiqc_files = ch_multiqc_files
+            .mix(SORTMERNA.out.log)
+
+        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+    }
+
+    // Branch FastQ channels if 'auto' specified to infer strandedness
+    ch_filtered_reads
+        .branch {
+            meta, fastq ->
+                auto_strand : meta.strandedness == 'auto'
+                    return [ meta, fastq ]
+                known_strand: meta.strandedness != 'auto'
+                    return [ meta, fastq ]
+        }
+        .set { ch_strand_fastq }
+
+    //
+    // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness
+    //
+    // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created
+
+    ch_fasta
+        .combine(ch_strand_fastq.auto_strand)
+        .map { it.first() }
+        .first()
+        .set { ch_genome_fasta }
+
+    FASTQ_SUBSAMPLE_FQ_SALMON (
+        ch_strand_fastq.auto_strand,
+        ch_genome_fasta,
+        ch_transcript_fasta,
+        ch_gtf,
+        ch_salmon_index,
+        make_salmon_index
+    )
+    ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions)
+
+    FASTQ_SUBSAMPLE_FQ_SALMON
+        .out
+        .lib_format_counts               
+        .join(ch_strand_fastq.auto_strand) 
+        .map {
+            meta, json, reads ->
+                def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=stranded_threshold, unstranded_threshold=unstranded_threshold)
+                strandedness = salmon_strand_analysis.inferred_strandedness
+                if (strandedness == 'undetermined') {
+                    strandedness = 'unstranded'
+                }
+                return [ meta + [ strandedness: strandedness, salmon_strand_analysis: salmon_strand_analysis ], reads ]
+        }
+        .mix(ch_strand_fastq.known_strand)
+        .set { ch_strand_inferred_fastq }
+
+    emit:
+
+    reads           = ch_strand_inferred_fastq
+    trim_read_count = ch_trim_read_count
+
+    multiqc_files   = ch_multiqc_files.transpose().map{it[1]}
+    versions        = ch_versions                     // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
new file mode 100644
index 000000000..cf7cd885f
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
@@ -0,0 +1,132 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fastq_qc_trim_filter_setstrandedness"
+description: Basic FASTQ preprocessing for RNA-seq
+keywords:
+  - fastq
+  - rnaseq
+  - rrna
+  - trimming
+  - subsample
+  - strandedness
+components:
+  - bbmap/bbsplit
+  - samtools/sort
+  - samtools/index
+  - cat
+  - cat/fastq
+  - sortmerna
+  - fastq_subsample_fq_salmon
+  - fastq_fastqc_umitools_trimgalore
+  - fastq_fastqc_umitools_fastp
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - ch_reads:
+      type: file
+      description: |
+        Channel with input FastQ files of size 1 and 2 for single-end and
+        paired-end data, respectively.
+  - ch_fasta:
+      type: file
+      description: Channel with genome sequence in fasta format
+  - ch_transcript_fasta:
+      type: file
+      description: Channel with transcriptome sequence in fasta format
+  - ch_gtf:
+      type: file
+      description: Channel with features in GTF format
+  - ch_salmon_index:
+      type: file
+      description: Directory containing Salmon index
+  - ch_sortmerna_index:
+      type: file
+      description: Directory containing sortmerna index
+  - ch_bbsplit_index:
+      type: file
+      description: Path to directory or tar.gz archive for pre-built BBSplit index
+  - ch_ribo_db:
+      type: file
+      description: |
+        Channel with text file containing paths to fasta files (one per line)
+        that will be used to create the database for SortMeRNA
+  - skip_bbsplit:
+      type: boolean
+      description: Whether to skip BBSplit for removal of non-reference genome reads
+  - skip_fastqc:
+      type: boolean
+      description: Whether to skip FastQC
+  - skip_trimming:
+      type: boolean
+      description: Whether to skip trimming
+  - skip_umi_extract:
+      type: boolean
+      description: |
+        Skip the UMI extraction from the read in case the UMIs have been moved
+        to the headers in advance of the pipeline run
+  - make_salmon_index:
+      type: boolean
+      description: Whether to create salmon index before running salmon quant
+  - make_sortmerna_index:
+      type: boolean
+      description: Whether to create sortmerna index before running sortmerna
+  - trimmer:
+      type: string
+      description: |
+        Specifies the trimming tool to use - available options are 'trimgalore'
+        and 'fastp'
+  - min_trimmed_reads:
+      type: integer
+      description: |
+        Minimum number of trimmed reads below which samples are removed from
+        further processing
+  - save_trimmed:
+      type: boolean
+      description: Save the trimmed FastQ files in the results directory?
+  - remove_ribo_rna:
+      type: boolean
+      description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA?
+  - with_umi:
+      type: boolean
+      description: Enable UMI-based read deduplication
+  - umi_discard_read:
+      type: integer
+      description: |
+        After UMI barcode extraction discard either R1 or R2 by setting this
+        parameter to 1 or 2, respectively
+  - stranded_threshold:
+      type: float
+      min: 0.5
+      description: |
+        The fraction of stranded reads that must be assigned to a strandedness
+        for confident assignment. Must be at least 0.5.
+  - unstranded_threshold:
+      type: float
+      description: |
+        The difference in fraction of stranded reads assigned to 'forward' and
+        'reverse' below which a sample is classified as 'unstranded'.
+
+output:
+  - reads:
+      type: file
+      description: Preprocessed fastq reads
+      pattern: "*.{fq,fastq}{,.gz}"
+  - multiqc_files:
+      type: file
+      description: MultiQC-compatible output files from tools used in prepreocessing
+      pattern: "*"
+  - trim_read_count:
+      type: integer
+      description: Number of reads remaining after trimming for all input samples
+  - versions:
+      type: file
+      description: |
+        File containing software versions
+        Structure: [ path(versions.yml) ]
+      pattern: "versions.yml"
+authors:
+  - "@pinin4fjords"
+maintainers:
+  - "@pinin4fjords"
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
new file mode 100644
index 000000000..b58ff098e
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
@@ -0,0 +1,135 @@
+nextflow_workflow {
+
+    name "Test Subworkflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS"
+    script "../main.nf"
+    workflow "FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS"
+    config "./nextflow.config"
+
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/fastq_qc_trim_filter_setstrandedness"
+
+    tag "bbmap/bbsplit"
+    tag "cat"
+    tag "cat/fastq"
+    tag "fastqc"
+    tag "sortmerna"
+    tag "subworkflows/fastq_fastqc_umitools_trimgalore"
+    tag "subworkflows/fastq_fastqc_umitools_fastp"
+    tag "subworkflows/fastq_subsample_fq_salmon"
+
+
+
+    test("homo_sapiens paired-end [fastq] fastp") {
+
+        when {
+            workflow {
+                """
+                ch_reads = Channel.of([
+                    [ id:'test', single_end:false, strandedness:'auto' ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+
+                ch_ribo_db = file('ribo_db.txt')
+                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
+
+                input[0] = ch_reads
+                input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
+                input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
+                input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
+                input[4] = []              // ch_salmon_index
+                input[5] = []              // ch_sortmerna_index
+                input[6] = []              // ch_bbsplit_index
+                input[7] = ch_ribo_db      // ch_ribo_db
+                input[8] = true            // skip_bbsplit
+                input[9] = false           // skip_fastqc
+                input[10] = false           // skip_trimming
+                input[11] = true           // skip_umi_extract
+                input[12] = true           // make_salmon_index
+                input[13] = true           // make_sortmerna_index
+                input[14] = 'fastp'        // trimmer
+                input[15] = 10             // min_trimmed_reads
+                input[16] = true           // save_trimmed
+                input[17] = true           // remove_ribo_rna
+                input[18] = false          // with_umi
+                input[19] = 0              // umi_discard_read
+                """
+            }
+        }
+
+        then {
+            def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip
+            def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(pelines1).md5().match("fastp_test_pe_reads_1_lines") },
+                { assert snapshot(pelines1.size()).match("fastp_test_pe_reads_1_size") },
+                { assert snapshot(pelines2).md5().match("fastp_test_pe_reads_2_lines") },
+                { assert snapshot(pelines2.size()).match("fastp_test_pe_reads_2_size") },
+                { assert snapshot(workflow.out.trim_read_count).match("fastp_read_count") }
+                // This doesn't work- 'cat' changes between Conda and Docker -
+                // leaving it here until we find a way to address that
+                // { assert snapshot(workflow.out.versions).match("fastp_versions") }
+            )
+        }
+    }
+    test("homo_sapiens paired-end [fastq] trimgalore") {
+
+        when {
+            workflow {
+                """
+                ch_reads = Channel.of([
+                    [ id:'test', single_end:false, strandedness:'auto' ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+
+                ch_ribo_db = file('ribo_db.txt')
+                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
+
+                input[0] = ch_reads
+                input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
+                input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
+                input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
+                input[4] = []              // ch_salmon_index
+                input[5] = []              // ch_sortmerna_index
+                input[6] = []              // ch_bbsplit_index
+                input[7] = ch_ribo_db      // ch_ribo_db
+                input[8] = true            // skip_bbsplit
+                input[9] = false           // skip_fastqc
+                input[10] = false           // skip_trimming
+                input[11] = true           // skip_umi_extract
+                input[12] = true           // make_salmon_index
+                input[13] = true           // make_sortmerna_index
+                input[14] = 'fastp'        // trimmer
+                input[15] = 10             // min_trimmed_reads
+                input[16] = true           // save_trimmed
+                input[17] = true           // remove_ribo_rna
+                input[18] = false          // with_umi
+                input[19] = 0              // umi_discard_read
+                """
+            }
+        }
+
+        then {
+            def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip
+            def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(pelines1).md5().match("trimgalore_test_pe_reads_1_lines") },
+                { assert snapshot(pelines1.size()).match("trimgalore_test_pe_reads_1_size") },
+                { assert snapshot(pelines2).md5().match("trimgalore_test_pe_reads_2_lines") },
+                { assert snapshot(pelines2.size()).match("trimgalore_test_pe_reads_2_size") },
+                { assert snapshot(workflow.out.trim_read_count).match("trimgalore_read_count") }
+                // This doesn't work- 'cat' changes between Conda and Docker -
+                // leaving it here until we find a way to address that
+                //{ assert snapshot(workflow.out.versions).match("trimgalore_versions") }
+            )
+        }
+    }
+}
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
new file mode 100644
index 000000000..21bdb2492
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
@@ -0,0 +1,112 @@
+{
+    "fastp_read_count": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false,
+                        "strandedness": "auto"
+                    },
+                    3022
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T16:19:13.057802"
+    },
+    "trimgalore_test_pe_reads_2_lines": {
+        "content": "eccf3e9e74589ff01c77fce7f4548e41",
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:44:07.667653"
+    },
+    "fastp_test_pe_reads_1_size": {
+        "content": [
+            4508
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:43:46.173892"
+    },
+    "trimgalore_test_pe_reads_1_size": {
+        "content": [
+            4508
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:44:07.642318"
+    },
+    "trimgalore_test_pe_reads_1_lines": {
+        "content": "3868fc1caf09367141d2bbf47e158823",
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:44:07.641186"
+    },
+    "fastp_test_pe_reads_2_lines": {
+        "content": "eccf3e9e74589ff01c77fce7f4548e41",
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:43:46.235022"
+    },
+    "fastp_test_pe_reads_2_size": {
+        "content": [
+            4508
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:43:46.242006"
+    },
+    "trimgalore_test_pe_reads_2_size": {
+        "content": [
+            4508
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:44:07.668644"
+    },
+    "fastp_test_pe_reads_1_lines": {
+        "content": "3868fc1caf09367141d2bbf47e158823",
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:43:46.161535"
+    },
+    "trimgalore_read_count": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false,
+                        "strandedness": "auto"
+                    },
+                    3022
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-02-24T17:44:07.669435"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config
new file mode 100644
index 000000000..9e33e4b33
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/nextflow.config
@@ -0,0 +1,27 @@
+//
+// Read subsampling and strand inferring options
+//
+
+process {
+    withName: 'FQ_SUBSAMPLE' {
+        ext.args   = '--record-count 1000000 --seed 1'
+        ext.prefix = { "${meta.id}.subsampled" }
+        publishDir = [
+            enabled: false
+        ]
+    }
+
+    withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' {
+        ext.args   = '--skipQuant --minAssignedFrags 1'
+        publishDir = [
+            enabled: false
+        ]
+    }
+    withName: 'SORTMERNA' {
+        ext.args  = '--index 0'
+    }
+    withName: 'SORTMERNA_INDEX' {
+        ext.args  = '--index 1'
+    }
+}
+
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml
new file mode 100644
index 000000000..cafd4a33d
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/preprocess_rnaseq:
+  - subworkflows/nf-core/preprocess_rnaseq/**
diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf
index 30eac1986..f4075b1c3 100755
--- a/workflows/rnaseq/main.nf
+++ b/workflows/rnaseq/main.nf
@@ -19,7 +19,6 @@ include { ALIGN_STAR    } from '../../subworkflows/local/align_star'
 include { QUANTIFY_RSEM } from '../../subworkflows/local/quantify_rsem'
 include { checkSamplesAfterGrouping      } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
 include { multiqcTsvFromList             } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
-include { getSalmonInferredStrandedness  } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
 include { getStarPercentMapped           } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
 include { biotypeInGtf                   } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
 include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_nfcore_rnaseq_pipeline'
@@ -68,6 +67,7 @@ include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG
 include { BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG as BEDGRAPH_BEDCLIP_BEDGRAPHTOBIGWIG_REVERSE } from '../../subworkflows/nf-core/bedgraph_bedclip_bedgraphtobigwig'
 include { QUANTIFY_PSEUDO_ALIGNMENT as QUANTIFY_STAR_SALMON } from '../../subworkflows/nf-core/quantify_pseudo_alignment'
 include { QUANTIFY_PSEUDO_ALIGNMENT                         } from '../../subworkflows/nf-core/quantify_pseudo_alignment'
+include { FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS              } from '../../subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -98,6 +98,7 @@ workflow RNASEQ {
     ch_salmon_index      // channel: path(salmon/index/)
     ch_kallisto_index    // channel: [ meta, path(kallisto/index/) ]
     ch_bbsplit_index     // channel: path(bbsplit/index/)
+    ch_ribo_db           // channel: path(sortmerna_fasta_list)
     ch_sortmerna_index   // channel: path(sortmerna/index/)
     ch_splicesites       // channel: path(genome.splicesites.txt)
     make_sortmerna_index // boolean: Whether to create an index before running sortmerna
@@ -126,203 +127,40 @@ workflow RNASEQ {
         .map {
             checkSamplesAfterGrouping(it)
         }
-        .branch {
-            meta, fastqs ->
-                single  : fastqs.size() == 1
-                    return [ meta, fastqs.flatten() ]
-                multiple: fastqs.size() > 1
-                    return [ meta, fastqs.flatten() ]
-        }
-        .set { ch_fastq }
-
-    //
-    // MODULE: Concatenate FastQ files from same sample if required
-    //
-    CAT_FASTQ (
-        ch_fastq.multiple
-    )
-    .reads
-    .mix(ch_fastq.single)
-    .set { ch_cat_fastq }
-    ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first())
-
-    //
-    // SUBWORKFLOW: Read QC, extract UMI and trim adapters with TrimGalore!
-    //
-    ch_filtered_reads  = Channel.empty()
-    ch_trim_read_count = Channel.empty()
-    if (params.trimmer == 'trimgalore') {
-        FASTQ_FASTQC_UMITOOLS_TRIMGALORE (
-            ch_cat_fastq,
-            params.skip_fastqc || params.skip_qc,
-            params.with_umi,
-            params.skip_umi_extract,
-            params.skip_trimming,
-            params.umi_discard_read,
-            params.min_trimmed_reads
-        )
-        ch_filtered_reads  = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads
-        ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_read_count
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}) // FastQC report - untrimmed
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.umi_log.collect{it[1]}) // umi-tools extract logs
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_zip.collect{it[1]})  // FastQC report - trimmed
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.trim_log.collect{it[1]}) // Trim Galore! trimming report
-        ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions)
-    }
+        .set{ ch_fastq }
 
     //
-    // SUBWORKFLOW: Read QC, extract UMI and trim adapters with fastp
+    // Run RNA-seq FASTQ preprocessing subworkflow
     //
-    if (params.trimmer == 'fastp') {
-        FASTQ_FASTQC_UMITOOLS_FASTP (
-            ch_cat_fastq,
-            params.skip_fastqc || params.skip_qc,
-            params.with_umi,
-            params.skip_umi_extract,
-            params.umi_discard_read,
-            params.skip_trimming,
-            [],
-            params.save_trimmed,
-            params.save_trimmed,
-            params.min_trimmed_reads
-        )
-        ch_filtered_reads  = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads
-        ch_trim_read_count = FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_read_count
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_raw_zip.collect{it[1]})
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.fastqc_trim_zip.collect{it[1]})
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.trim_json.collect{it[1]})
-        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.umi_log.collect{it[1]}) // umi-tools extract logs
-        ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions)
-    }
-
-    // Save trim status for workflow summary
 
-    ch_trim_status = ch_trim_read_count
-        .map {
-            meta, num_reads ->
-                return [ meta.id, num_reads > params.min_trimmed_reads.toFloat() ]
-        }
-    //
-    // Get list of samples that failed trimming threshold for MultiQC report
-    //
-    ch_trim_read_count
-        .map {
-            meta, num_reads ->
-                if (num_reads <= params.min_trimmed_reads.toFloat()) {
-                    return [ "$meta.id\t$num_reads" ]
-                }
-        }
-        .collect()
-        .map {
-            tsv_data ->
-                def header = ["Sample", "Reads after trimming"]
-                multiqcTsvFromList(tsv_data, header)
-        }
-        .set { ch_fail_trimming_multiqc }
-    ch_multiqc_files = ch_multiqc_files.mix(ch_fail_trimming_multiqc.collectFile(name: 'fail_trimmed_samples_mqc.tsv'))
-
-    //
-    // MODULE: Remove genome contaminant reads
-    //
-    if (!params.skip_bbsplit) {
-        BBMAP_BBSPLIT (
-            ch_filtered_reads,
-            ch_bbsplit_index,
-            [],
-            [ [], [] ],
-            false
-        )
-        .primary_fastq
-        .set { ch_filtered_reads }
-        ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions.first())
-    }
-
-    //
-    // MODULE: Remove ribosomal RNA reads
-    //
-    // Check rRNA databases for sortmerna
-    if (params.remove_ribo_rna) {
-        ch_ribo_db = file(params.ribo_database_manifest)
-        if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}
-
-        Channel.from(ch_ribo_db.readLines())
-            .map { row -> file(row, checkIfExists: true) }
-            .collect()
-            .map { [ 'rrna_refs', it ] }
-            .set { ch_sortmerna_fastas }
-
-        if (make_sortmerna_index) {
-            SORTMERNA_INDEX (
-                [ [],[] ],
-                ch_sortmerna_fastas,
-                [ [],[] ]
-            )
-            ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
-        }
-
-        SORTMERNA (
-            ch_filtered_reads,
-            ch_sortmerna_fastas,
-            ch_sortmerna_index
-        )
-        .reads
-        .set { ch_filtered_reads }
-
-        ch_multiqc_files = ch_multiqc_files.mix(SORTMERNA.out.log.collect{it[1]})
-        ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
-    }
-
-    //
-    // SUBWORKFLOW: Sub-sample FastQ files and pseudoalign with Salmon to auto-infer strandedness
-    //
-
-    // Branch FastQ channels if 'auto' specified to infer strandedness
-    ch_filtered_reads
-        .branch {
-            meta, fastq ->
-                auto_strand : meta.strandedness == 'auto'
-                    return [ meta, fastq ]
-                known_strand: meta.strandedness != 'auto'
-                    return [ meta, fastq ]
-        }
-        .set { ch_strand_fastq }
-
-    // Return empty channel if ch_strand_fastq.auto_strand is empty so salmon index isn't created
-    ch_fasta
-        .combine(ch_strand_fastq.auto_strand)
-        .map { it.first() }
-        .first()
-        .set { ch_genome_fasta }
-
-    def prepare_tool_indices = []
-    if (!params.skip_pseudo_alignment && params.pseudo_aligner) {
-        prepare_tool_indices << params.pseudo_aligner
-    }
-    FASTQ_SUBSAMPLE_FQ_SALMON (
-        ch_strand_fastq.auto_strand,
-        ch_genome_fasta,
+    FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS (
+        ch_fastq,
+        ch_fasta,
         ch_transcript_fasta,
         ch_gtf,
         ch_salmon_index,
-        !params.salmon_index && !('salmon' in prepare_tool_indices)
+        ch_sortmerna_index,
+        ch_bbsplit_index,
+        ch_ribo_db,
+        params.skip_bbsplit,
+        params.skip_fastqc || params.skip_qc,
+        params.skip_trimming,
+        params.skip_umi_extract,
+        !params.salmon_index && params.pseudo_aligner == 'salmon' && !skip_pseudo_alignment,
+        !params.sortmerna_index && params.remove_ribo_rna,
+        params.trimmer,
+        params.min_trimmed_reads,
+        params.save_trimmed,
+        params.remove_ribo_rna,
+        params.with_umi,
+        params.umi_discard_read,
+        params.stranded_threshold,
+        params.unstranded_threshold
     )
-    ch_versions = ch_versions.mix(FASTQ_SUBSAMPLE_FQ_SALMON.out.versions)
 
-    FASTQ_SUBSAMPLE_FQ_SALMON
-        .out
-        .lib_format_counts
-        .join(ch_strand_fastq.auto_strand)
-        .map {
-            meta, json, reads ->
-                def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=params.stranded_threshold, unstranded_threshold=params.unstranded_threshold)
-                strandedness = salmon_strand_analysis.inferred_strandedness
-                if (strandedness == 'undetermined') {
-                    strandedness = 'unstranded'
-                }
-                return [ meta + [ strandedness: strandedness, salmon_strand_analysis: salmon_strand_analysis ], reads ]
-        }
-        .mix(ch_strand_fastq.known_strand)
-        .set { ch_strand_inferred_filtered_fastq }
+    ch_multiqc_files                  = ch_multiqc_files.mix(FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.multiqc_files)
+    ch_versions                       = ch_versions.mix(FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.versions)
+    ch_strand_inferred_filtered_fastq = FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS.out.reads
 
     //
     // SUBWORKFLOW: Alignment with STAR and gene/transcript quantification with Salmon
@@ -870,11 +708,14 @@ workflow RNASEQ {
         // Provide MultiQC with rename patterns to ensure it uses sample names
         // for single-techrep samples not processed by CAT_FASTQ.
 
-        ch_name_replacements = ch_fastq.single
+        ch_name_replacements = ch_fastq
+            .filter{ meta, reads ->
+                reads.size() == 1
+            }
             .map{ meta, reads ->
-                def name1 = file(reads[0]).simpleName + "\t" + meta.id + '_1'
+                def name1 = file(reads[0][0]).simpleName + "\t" + meta.id + '_1'
                 if (reads[1] ){
-                    def name2 = file(reads[1]).simpleName + "\t" + meta.id + '_2'
+                    def name2 = file(reads[0][1]).simpleName + "\t" + meta.id + '_2'
                     return [ name1, name2 ]
                 } else{
                     return name1

From 2389882790283afd7fdc2b3cf9fd2b53ea5d5b31 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Tue, 16 Jul 2024 11:09:37 +0000
Subject: [PATCH 02/11] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1e40c3cb..ac293d86a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -104,6 +104,7 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
 - [PR #1336](https://github.com/nf-core/rnaseq/pull/1334) - Use nf-core/setup-nf-test to install nf-test from cache during CI/CD
 - [PR #1340](https://github.com/nf-core/rnaseq/pull/1340) - Remove out-of-date Azure specific guidance
 - [PR #1341](https://github.com/nf-core/rnaseq/pull/1341) - Add rename in the MultiQC report for samples without techreps
+- [PR #1342](https://github.com/nf-core/rnaseq/pull/1342) - Factor out preprocessing
 
 ### Parameters
 

From 4d308443227a6431e118456c2700a3f20bdab2df Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Wed, 17 Jul 2024 09:16:57 +0100
Subject: [PATCH 03/11] Update RNAseq preprocessing swf

---
 .../nf-core/fastq_qc_trim_filter_setstrandedness/main.nf    | 6 +++---
 .../fastq_qc_trim_filter_setstrandedness/tests/main.nf.test | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
index a17fc0b2b..ac8217ee9 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
@@ -104,7 +104,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
     remove_ribo_rna      // boolean: true/false: whether to run sortmerna to remove rrnas
     with_umi             // boolean: true/false: Enable UMI-based read deduplication.
     umi_discard_read     // integer: 0, 1 or 2
-    stranded_threshold   // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5 
+    stranded_threshold   // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5
     unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'
 
     main:
@@ -296,8 +296,8 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
 
     FASTQ_SUBSAMPLE_FQ_SALMON
         .out
-        .lib_format_counts               
-        .join(ch_strand_fastq.auto_strand) 
+        .lib_format_counts
+        .join(ch_strand_fastq.auto_strand)
         .map {
             meta, json, reads ->
                 def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=stranded_threshold, unstranded_threshold=unstranded_threshold)
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
index b58ff098e..ad5eef725 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
@@ -56,6 +56,8 @@ nextflow_workflow {
                 input[17] = true           // remove_ribo_rna
                 input[18] = false          // with_umi
                 input[19] = 0              // umi_discard_read
+                input[20] = 0.8            // stranded_threshold
+                input[21] = 0.1            // unstranded_threshold
                 """
             }
         }
@@ -112,6 +114,8 @@ nextflow_workflow {
                 input[17] = true           // remove_ribo_rna
                 input[18] = false          // with_umi
                 input[19] = 0              // umi_discard_read
+                input[20] = 0.8            // stranded_threshold
+                input[21] = 0.1            // unstranded_threshold
                 """
             }
         }

From 9e8f1e0b4d3a98b0e07e48377f363beeb689e8fc Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 08:26:58 +0000
Subject: [PATCH 04/11] update modules.json

---
 modules.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index 346597902..860d54c94 100644
--- a/modules.json
+++ b/modules.json
@@ -324,7 +324,7 @@
                     },
                     "fastq_qc_trim_filter_setstrandedness": {
                         "branch": "master",
-                        "git_sha": "81ed776ec05bf9ae89da3442c29b5c5c7afa807b",
+                        "git_sha": "0a8d33545937d7ab061d0fd2499dc23891ddb6e0",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {

From 6a22103bdd6615ebf53c661e20b7b3f7166e6d3f Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 09:20:38 +0000
Subject: [PATCH 05/11] fix up ribo db wiring

---
 main.nf                                              |  2 +-
 subworkflows/local/prepare_genome/main.nf            | 12 +++++-------
 .../fastq_qc_trim_filter_setstrandedness/main.nf     |  5 ++---
 .../fastq_qc_trim_filter_setstrandedness/meta.yml    |  6 +++---
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/main.nf b/main.nf
index 152c320e4..a0a6f1c7d 100755
--- a/main.nf
+++ b/main.nf
@@ -117,7 +117,7 @@ workflow NFCORE_RNASEQ {
         PREPARE_GENOME.out.salmon_index,
         PREPARE_GENOME.out.kallisto_index,
         PREPARE_GENOME.out.bbsplit_index,
-        PREPARE_GENOME.out.ribo_db,
+        PREPARE_GENOME.out.rrna_fastas,
         PREPARE_GENOME.out.sortmerna_index,
         PREPARE_GENOME.out.splicesites,
         !params.remove_ribo_rna && params.remove_ribo_rna
diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf
index 050395442..92f8cc04a 100644
--- a/subworkflows/local/prepare_genome/main.nf
+++ b/subworkflows/local/prepare_genome/main.nf
@@ -228,10 +228,10 @@ workflow PREPARE_GENOME {
     // Uncompress sortmerna index or generate from scratch if required
     //
     ch_sortmerna_index = Channel.empty()
-    ch_ribo_db = Channel.empty()
+    ch_rrna_fastas = Channel.empty()
 
     if ('sortmerna' in prepare_tool_indices) {
-        ch_ribo_db = file(sortmerna_fasta_list)
+        ribo_db = file(sortmerna_fasta_list)
 
         if (sortmerna_index) {
             if (sortmerna_index.endsWith('.tar.gz')) {
@@ -241,14 +241,12 @@ workflow PREPARE_GENOME {
                 ch_sortmerna_index = Channel.value(file(sortmerna_index))
             }
         } else {
-            ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
+            ch_rrna_fastas = Channel.from(ribo_db.readLines())
                 .map { row -> file(row, checkIfExists: true) }
-                .collect()
-                .map { [ 'rrna_refs', it ] }
 
             SORTMERNA_INDEX (
                 Channel.of([ [],[] ]),
-                ch_sortmerna_fastas,
+                ch_rrna_fastas.collect().map { [ 'rrna_refs', it ] },
                 Channel.of([ [],[] ])
             )
             ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
@@ -374,7 +372,7 @@ workflow PREPARE_GENOME {
     chrom_sizes      = ch_chrom_sizes            // channel: path(genome.sizes)
     splicesites      = ch_splicesites            // channel: path(genome.splicesites.txt)
     bbsplit_index    = ch_bbsplit_index          // channel: path(bbsplit/index/)
-    ribo_db          = ch_ribo_db                // channel: path(sortmerna_fasta_list)
+    rrna_fastas      = ch_rrna_fastas            // channel: path(sortmerna_fasta_list)
     sortmerna_index  = ch_sortmerna_index        // channel: path(sortmerna/index/)
     star_index       = ch_star_index             // channel: path(star/index/)
     rsem_index       = ch_rsem_index             // channel: path(rsem/index/)
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
index ac8217ee9..0651753e1 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
@@ -91,7 +91,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
     ch_salmon_index      // channel: /path/to/salmon/index/ (optional)
     ch_sortmerna_index   // channel: /path/to/sortmerna/index/ (optional)
     ch_bbsplit_index     // channel: /path/to/bbsplit/index/ (optional)
-    ch_ribo_db           // channel: /path/to/ Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional)
+    ch_rrna_fastas       // channel: one or more fasta files containing rrna sequencs passed to SortMeRNA (optional)
     skip_bbsplit         // boolean: Skip BBSplit for removal of non-reference genome reads.
     skip_fastqc          // boolean: true/false
     skip_trimming        // boolean: true/false
@@ -233,8 +233,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
     // MODULE: Remove ribosomal RNA reads
     //
     if (remove_ribo_rna) {
-        ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
-            .map { row -> file(row, checkIfExists: true) }
+        ch_sortmerna_fastas = ch_rrna_fastas
             .collect()
             .map{ ['rrna_refs', it] }
 
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
index cf7cd885f..6f92f56a0 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
@@ -47,11 +47,11 @@ input:
   - ch_bbsplit_index:
       type: file
       description: Path to directory or tar.gz archive for pre-built BBSplit index
-  - ch_ribo_db:
+  - ch_rrna_fastas:
       type: file
       description: |
-        Channel with text file containing paths to fasta files (one per line)
-        that will be used to create the database for SortMeRNA
+        Channel containing one or more FASTA files containing rRNA sequences
+        for use with SortMeRNA
   - skip_bbsplit:
       type: boolean
       description: Whether to skip BBSplit for removal of non-reference genome reads

From 2447c6b76771860ed00900419f2e5bf4fa841a51 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 09:26:48 +0000
Subject: [PATCH 06/11] Install swf from branch for now

---
 modules.json                                                  | 4 ++--
 .../nf-core/fastq_qc_trim_filter_setstrandedness/main.nf      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules.json b/modules.json
index 860d54c94..310fb392e 100644
--- a/modules.json
+++ b/modules.json
@@ -323,8 +323,8 @@
                         "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
                     },
                     "fastq_qc_trim_filter_setstrandedness": {
-                        "branch": "master",
-                        "git_sha": "0a8d33545937d7ab061d0fd2499dc23891ddb6e0",
+                        "branch": "rrna_prerpo_rrna_should_be_channel",
+                        "git_sha": "d1e4c1977912284d6c4c1a1b351968cb6fcf1b6d",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
index 0651753e1..cd69166f3 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
@@ -91,7 +91,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
     ch_salmon_index      // channel: /path/to/salmon/index/ (optional)
     ch_sortmerna_index   // channel: /path/to/sortmerna/index/ (optional)
     ch_bbsplit_index     // channel: /path/to/bbsplit/index/ (optional)
-    ch_rrna_fastas       // channel: one or more fasta files containing rrna sequencs passed to SortMeRNA (optional)
+    ch_rrna_fastas       // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional)
     skip_bbsplit         // boolean: Skip BBSplit for removal of non-reference genome reads.
     skip_fastqc          // boolean: true/false
     skip_trimming        // boolean: true/false

From 3da0bc3ec5c0c6259dc1537230d24b070351366b Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 09:43:04 +0000
Subject: [PATCH 07/11] Fix tests

---
 .../tests/main.nf.test                                 | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
index ad5eef725..038af9e30 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
@@ -33,9 +33,6 @@ nextflow_workflow {
                     ]
                 ])
 
-                ch_ribo_db = file('ribo_db.txt')
-                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
-
                 input[0] = ch_reads
                 input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
                 input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
@@ -43,7 +40,7 @@ nextflow_workflow {
                 input[4] = []              // ch_salmon_index
                 input[5] = []              // ch_sortmerna_index
                 input[6] = []              // ch_bbsplit_index
-                input[7] = ch_ribo_db      // ch_ribo_db
+                input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas 
                 input[8] = true            // skip_bbsplit
                 input[9] = false           // skip_fastqc
                 input[10] = false           // skip_trimming
@@ -91,9 +88,6 @@ nextflow_workflow {
                     ]
                 ])
 
-                ch_ribo_db = file('ribo_db.txt')
-                ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')
-
                 input[0] = ch_reads
                 input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
                 input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
@@ -101,7 +95,7 @@ nextflow_workflow {
                 input[4] = []              // ch_salmon_index
                 input[5] = []              // ch_sortmerna_index
                 input[6] = []              // ch_bbsplit_index
-                input[7] = ch_ribo_db      // ch_ribo_db
+                input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas 
                 input[8] = true            // skip_bbsplit
                 input[9] = false           // skip_fastqc
                 input[10] = false           // skip_trimming

From 259dccfed00a19a18911739ec2031ea2b502ccea Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 09:45:10 +0000
Subject: [PATCH 08/11] Update modules.json

---
 modules.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index 310fb392e..f16d62bbf 100644
--- a/modules.json
+++ b/modules.json
@@ -324,7 +324,7 @@
                     },
                     "fastq_qc_trim_filter_setstrandedness": {
                         "branch": "rrna_prerpo_rrna_should_be_channel",
-                        "git_sha": "d1e4c1977912284d6c4c1a1b351968cb6fcf1b6d",
+                        "git_sha": "d41517e03cd875b9b469a0d27e31c7afd9d2250f",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {

From fe950ba0b07aa2103ae250bfd24afb64104ebcbf Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 10:32:50 +0000
Subject: [PATCH 09/11] Move strandedness function testing to swf

---
 modules.json                                  |   2 +-
 .../tests/main.function.nf.test               | 109 ----------------
 .../tests/main.function.nf.test.snap          |  45 -------
 .../tests/main.function.nf.test               | 118 ++++++++++++++++++
 .../tests/main.function.nf.test.snap          |  47 +++++++
 .../tests/main.nf.test                        |   2 +
 .../tests/main.nf.test.snap                   |  40 +++---
 7 files changed, 188 insertions(+), 175 deletions(-)
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test
 create mode 100644 subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap

diff --git a/modules.json b/modules.json
index f16d62bbf..9d8b94799 100644
--- a/modules.json
+++ b/modules.json
@@ -324,7 +324,7 @@
                     },
                     "fastq_qc_trim_filter_setstrandedness": {
                         "branch": "rrna_prerpo_rrna_should_be_channel",
-                        "git_sha": "d41517e03cd875b9b469a0d27e31c7afd9d2250f",
+                        "git_sha": "e9fde2d1f5d38e2ed61c217aef44e23e8ed07103",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test
index c809b2117..8390034c5 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test
@@ -390,115 +390,6 @@ nextflow_function {
 
     }
 
-    test("Test Function getSalmonInferredStrandedness unstranded") {
-
-        function "getSalmonInferredStrandedness"
-
-        when {
-            function {
-                """
-                import groovy.json.JsonOutput
-
-                // Define the JSON contents for the test
-                def json_contents = JsonOutput.toJson([
-                    "SF": 0,
-                    "SR": 0,
-                    "ISF": 100,
-                    "ISR": 100,
-                    "IU": 0,
-                    "U": 0
-                ])
-                def jsonFile = file("${workDir}/salmonUnstranded.json")
-                jsonFile.write(json_contents)
-
-                input[0] = jsonFile
-                input[1] = 0.8
-                input[2] = 0.1
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert function.success },
-                { assert snapshot(function.result).match() }
-            )
-        }
-
-    }
-
-    test("Test Function getSalmonInferredStrandedness forward") {
-
-        function "getSalmonInferredStrandedness"
-
-        when {
-            function {
-                """
-                import groovy.json.JsonOutput
-
-                def json_contents = JsonOutput.toJson([
-                    "SF": 0,
-                    "SR": 0,
-                    "ISF": 100,
-                    "ISR": 0,
-                    "IU": 0,
-                    "U": 0
-                ])
-                def jsonFile = file("${workDir}/salmonForward.json")
-                jsonFile.write(json_contents)
-
-                input[0] = jsonFile
-                input[1] = 0.8
-                input[2] = 0.1
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert function.success },
-                { assert snapshot(function.result).match() }
-            )
-        }
-
-    }
-
-    test("Test Function getSalmonInferredStrandedness reverse") {
-
-        function "getSalmonInferredStrandedness"
-
-        when {
-            function {
-                """
-                import groovy.json.JsonOutput
-
-                def json_contents = JsonOutput.toJson([
-                    "SF": 0,
-                    "SR": 0,
-                    "ISF": 0,
-                    "ISR": 100,
-                    "IU": 0,
-                    "U": 0
-                ])
-                def jsonFile = file("${workDir}/salmonReverse.json")
-                jsonFile.write(json_contents)
-
-                input[0] = jsonFile
-                input[1] = 0.8
-                input[2] = 0.1
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert function.success },
-                { assert snapshot(function.result).match() }
-            )
-        }
-
-    }
-
     test("Test Function getStarPercentMapped pass") {
 
         function "getStarPercentMapped"
diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap
index dd632fbc3..ed2d57860 100644
--- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap
+++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/tests/main.function.nf.test.snap
@@ -45,36 +45,6 @@
         },
         "timestamp": "2024-03-06T14:33:26.903306"
     },
-    "Test Function getSalmonInferredStrandedness unstranded": {
-        "content": [
-            {
-                "inferred_strandedness": "unstranded",
-                "forwardFragments": 50.0,
-                "reverseFragments": 50.0,
-                "unstrandedFragments": 0.0
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-06-18T14:29:54.96715"
-    },
-    "Test Function getSalmonInferredStrandedness reverse": {
-        "content": [
-            {
-                "inferred_strandedness": "reverse",
-                "forwardFragments": 0.0,
-                "reverseFragments": 100.0,
-                "unstrandedFragments": 0.0
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-06-18T14:30:11.417381"
-    },
     "Test Function checkSamplesAfterGrouping invalid strandedness": {
         "content": null,
         "meta": {
@@ -215,21 +185,6 @@
         },
         "timestamp": "2024-03-06T14:32:49.565504"
     },
-    "Test Function getSalmonInferredStrandedness forward": {
-        "content": [
-            {
-                "inferred_strandedness": "forward",
-                "forwardFragments": 100.0,
-                "reverseFragments": 0.0,
-                "unstrandedFragments": 0.0
-            }
-        ],
-        "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
-        },
-        "timestamp": "2024-06-18T14:30:03.301262"
-    },
     "Test Function rsemStarIndexWarn": {
         "content": null,
         "meta": {
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test
new file mode 100644
index 000000000..57fcd302b
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test
@@ -0,0 +1,118 @@
+nextflow_function {
+
+    name "Test Functions"
+    script "../main.nf"
+    
+    //
+    // Test function for deriving strandedness from Salmon numbers
+    //
+
+    test("Test Function getSalmonInferredStrandedness unstranded") {
+
+        function "getSalmonInferredStrandedness"
+
+        when {
+            function {
+                """
+                import groovy.json.JsonOutput
+
+                // Define the JSON contents for the test
+                def json_contents = JsonOutput.toJson([
+                    "SF": 0,
+                    "SR": 0,
+                    "ISF": 100,
+                    "ISR": 100,
+                    "IU": 0,
+                    "U": 0
+                ])
+                def jsonFile = file("${workDir}/salmonUnstranded.json")
+                jsonFile.write(json_contents)
+
+                input[0] = jsonFile
+                input[1] = 0.8
+                input[2] = 0.1
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+
+    }
+
+    test("Test Function getSalmonInferredStrandedness forward") {
+
+        function "getSalmonInferredStrandedness"
+
+        when {
+            function {
+                """
+                import groovy.json.JsonOutput
+
+                def json_contents = JsonOutput.toJson([
+                    "SF": 0,
+                    "SR": 0,
+                    "ISF": 100,
+                    "ISR": 0,
+                    "IU": 0,
+                    "U": 0
+                ])
+                def jsonFile = file("${workDir}/salmonForward.json")
+                jsonFile.write(json_contents)
+
+                input[0] = jsonFile
+                input[1] = 0.8
+                input[2] = 0.1
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+
+    }
+
+    test("Test Function getSalmonInferredStrandedness reverse") {
+
+        function "getSalmonInferredStrandedness"
+
+        when {
+            function {
+                """
+                import groovy.json.JsonOutput
+
+                def json_contents = JsonOutput.toJson([
+                    "SF": 0,
+                    "SR": 0,
+                    "ISF": 0,
+                    "ISR": 100,
+                    "IU": 0,
+                    "U": 0
+                ])
+                def jsonFile = file("${workDir}/salmonReverse.json")
+                jsonFile.write(json_contents)
+
+                input[0] = jsonFile
+                input[1] = 0.8
+                input[2] = 0.1
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+
+    }
+}
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap
new file mode 100644
index 000000000..4b4e2e874
--- /dev/null
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.function.nf.test.snap
@@ -0,0 +1,47 @@
+{
+    "Test Function getSalmonInferredStrandedness forward": {
+        "content": [
+            {
+                "inferred_strandedness": "forward",
+                "forwardFragments": 100.0,
+                "reverseFragments": 0.0,
+                "unstrandedFragments": 0.0
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-17T10:28:59.284820592"
+    },
+    "Test Function getSalmonInferredStrandedness unstranded": {
+        "content": [
+            {
+                "inferred_strandedness": "unstranded",
+                "forwardFragments": 50.0,
+                "reverseFragments": 50.0,
+                "unstrandedFragments": 0.0
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-17T10:28:47.514036696"
+    },
+    "Test Function getSalmonInferredStrandedness reverse": {
+        "content": [
+            {
+                "inferred_strandedness": "reverse",
+                "forwardFragments": 0.0,
+                "reverseFragments": 100.0,
+                "unstrandedFragments": 0.0
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-17T10:29:11.43961965"
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
index 038af9e30..5242f2bee 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test
@@ -130,4 +130,6 @@ nextflow_workflow {
             )
         }
     }
+    
+
 }
diff --git a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
index 21bdb2492..985349317 100644
--- a/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/tests/main.nf.test.snap
@@ -14,17 +14,17 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T16:19:13.057802"
+        "timestamp": "2024-07-17T10:24:00.044553245"
     },
     "trimgalore_test_pe_reads_2_lines": {
         "content": "eccf3e9e74589ff01c77fce7f4548e41",
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:44:07.667653"
+        "timestamp": "2024-07-17T10:24:26.838793051"
     },
     "fastp_test_pe_reads_1_size": {
         "content": [
@@ -32,9 +32,9 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:43:46.173892"
+        "timestamp": "2024-07-17T10:23:59.889337984"
     },
     "trimgalore_test_pe_reads_1_size": {
         "content": [
@@ -42,25 +42,25 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:44:07.642318"
+        "timestamp": "2024-07-17T10:24:26.778599725"
     },
     "trimgalore_test_pe_reads_1_lines": {
         "content": "3868fc1caf09367141d2bbf47e158823",
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:44:07.641186"
+        "timestamp": "2024-07-17T10:24:26.774975135"
     },
     "fastp_test_pe_reads_2_lines": {
         "content": "eccf3e9e74589ff01c77fce7f4548e41",
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:43:46.235022"
+        "timestamp": "2024-07-17T10:23:59.997625278"
     },
     "fastp_test_pe_reads_2_size": {
         "content": [
@@ -68,9 +68,9 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:43:46.242006"
+        "timestamp": "2024-07-17T10:24:00.042449965"
     },
     "trimgalore_test_pe_reads_2_size": {
         "content": [
@@ -78,17 +78,17 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:44:07.668644"
+        "timestamp": "2024-07-17T10:24:26.841434261"
     },
     "fastp_test_pe_reads_1_lines": {
         "content": "3868fc1caf09367141d2bbf47e158823",
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:43:46.161535"
+        "timestamp": "2024-07-17T10:23:59.882844295"
     },
     "trimgalore_read_count": {
         "content": [
@@ -105,8 +105,8 @@
         ],
         "meta": {
             "nf-test": "0.8.4",
-            "nextflow": "23.10.1"
+            "nextflow": "24.04.3"
         },
-        "timestamp": "2024-02-24T17:44:07.669435"
+        "timestamp": "2024-07-17T10:24:26.84402498"
     }
 }
\ No newline at end of file

From 6bb1a791ad4ec46f563686957a99682da79db06e Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Wed, 17 Jul 2024 12:12:14 +0100
Subject: [PATCH 10/11] Install from modules master

---
 modules.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules.json b/modules.json
index 9d8b94799..8d89cfcbb 100644
--- a/modules.json
+++ b/modules.json
@@ -323,8 +323,8 @@
                         "installed_by": ["fastq_qc_trim_filter_setstrandedness", "subworkflows"]
                     },
                     "fastq_qc_trim_filter_setstrandedness": {
-                        "branch": "rrna_prerpo_rrna_should_be_channel",
-                        "git_sha": "e9fde2d1f5d38e2ed61c217aef44e23e8ed07103",
+                        "branch": "master",
+                        "git_sha": "b86de50ab60c19ab40e70a4501820f4cb307050b",
                         "installed_by": ["subworkflows"]
                     },
                     "fastq_subsample_fq_salmon": {

From 1049fa57dd5cb93efb48f4a3d4733d44b54812fb Mon Sep 17 00:00:00 2001
From: Jonathan Manning <pininforthefjords@gmail.com>
Date: Wed, 17 Jul 2024 13:16:21 +0000
Subject: [PATCH 11/11] Strip preprocessing components relocated to
 subworkflows

---
 workflows/rnaseq/main.nf | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf
index f4075b1c3..896159378 100755
--- a/workflows/rnaseq/main.nf
+++ b/workflows/rnaseq/main.nf
@@ -32,8 +32,6 @@ include { getInferexperimentStrandedness } from '../../subworkflows/local/utils_
 //
 // MODULE: Installed directly from nf-core/modules
 //
-include { CAT_FASTQ               } from '../../modules/nf-core/cat/fastq'
-include { BBMAP_BBSPLIT           } from '../../modules/nf-core/bbmap/bbsplit'
 include { DUPRADAR                } from '../../modules/nf-core/dupradar'
 include { SAMTOOLS_SORT           } from '../../modules/nf-core/samtools/sort'
 include { PRESEQ_LCEXTRAP         } from '../../modules/nf-core/preseq/lcextrap'
@@ -42,8 +40,6 @@ include { STRINGTIE_STRINGTIE     } from '../../modules/nf-core/stringtie/string
 include { SUBREAD_FEATURECOUNTS   } from '../../modules/nf-core/subread/featurecounts'
 include { MULTIQC                 } from '../../modules/nf-core/multiqc'
 include { UMITOOLS_PREPAREFORRSEM as UMITOOLS_PREPAREFORSALMON } from '../../modules/nf-core/umitools/prepareforrsem'
-include { SORTMERNA                                            } from '../../modules/nf-core/sortmerna'
-include { SORTMERNA as SORTMERNA_INDEX                         } from '../../modules/nf-core/sortmerna'
 include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_FW          } from '../../modules/nf-core/bedtools/genomecov'
 include { BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_REV         } from '../../modules/nf-core/bedtools/genomecov'
 
@@ -54,9 +50,6 @@ include { paramsSummaryMap                 } from 'plugin/nf-validation'
 include { fromSamplesheet                  } from 'plugin/nf-validation'
 include { paramsSummaryMultiqc             } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
 include { softwareVersionsToYAML           } from '../../subworkflows/nf-core/utils_nfcore_pipeline'
-include { FASTQ_SUBSAMPLE_FQ_SALMON        } from '../../subworkflows/nf-core/fastq_subsample_fq_salmon'
-include { FASTQ_FASTQC_UMITOOLS_TRIMGALORE } from '../../subworkflows/nf-core/fastq_fastqc_umitools_trimgalore'
-include { FASTQ_FASTQC_UMITOOLS_FASTP      } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
 include { FASTQ_ALIGN_HISAT2               } from '../../subworkflows/nf-core/fastq_align_hisat2'
 include { BAM_SORT_STATS_SAMTOOLS          } from '../../subworkflows/nf-core/bam_sort_stats_samtools'
 include { BAM_MARKDUPLICATES_PICARD        } from '../../subworkflows/nf-core/bam_markduplicates_picard'