Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bulk rnaseq multiqc per sample #67

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 146 additions & 113 deletions bulk_RNASeq/bulk_rna_seq.nf
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ params.dbsnp = ""
params.dbsnp_tbi = ""
params.gene_mapper = ""
params.contig_format_map = ""
params.call_snps = ""
params.format_contigs = ""
params.tmp_dir = ""
params.results_directory = ""
@@ -55,13 +56,17 @@ include { BCFTOOLS_CONTIG_CONVERSION} from './modules/bcftools_contig_conversion
include { BCFTOOLS_SORT_VCF } from './modules/bcftools_sort_vcf'
include { BCFTOOLS_INDEX_VCF } from './modules/bcftools_index_vcf'
include { BCFTOOLS_MERGE_VCF } from './modules/bcftools_merge_vcf'
include { MULTIQC_PER_SAMPLE } from './modules/multiqc_per_sample'
include { MULTIQC } from './modules/multiqc'



workflow {
// To gather all QC reports for MultiQC
ch_reports = Channel.empty()
// To gather all QC reports for MultiQC_PER_SAMPLE
ch_reports_per_sample = Channel.empty()
logs = Channel.empty()
//
// SUBWORKFLOW: Read in samplesheet, validate and stage input files
//
@@ -103,7 +108,9 @@ workflow {
)
ch_trimmed_reads = FASTP_TRIM_ADAPTERS.out.trimmed_reads
ch_trim_multiqc = FASTP_TRIM_ADAPTERS.out.json_report
ch_reports = ch_reports.mix(ch_trim_multiqc)
// FASTP_TRIM_ADAPTERS.out.json_report.collect().set { logs }
ch_reports = ch_reports.mix(FASTP_TRIM_ADAPTERS.out.json_report.collect{it[1]}.ifEmpty([]))
ch_reports_per_sample = ch_reports_per_sample.mix(ch_trim_multiqc)
//
// MODULE: Remove ribosomal RNA reads
//
@@ -117,7 +124,9 @@ workflow {
)
ch_trimmed_reads = SORTMERNA_RIBOSOMAL_RNA_REMOVAL.out.reads
ch_sortmerna_multiqc = SORTMERNA_RIBOSOMAL_RNA_REMOVAL.out.log
// ch_sortmerna_json_report = SORTMERNA_RIBOSOMAL_RNA_REMOVAL.out.json_report
ch_reports = ch_reports.mix(SORTMERNA_RIBOSOMAL_RNA_REMOVAL.out.log.collect{it[1]}.ifEmpty([]))
ch_reports_per_sample = ch_reports_per_sample.mix(ch_sortmerna_multiqc)
}
//
// MODULE: Quantify transcriptome abundance using Kallisto
@@ -129,8 +138,12 @@ workflow {
params.transcript_index
)
ch_kallisto_counts = KALLISTO_QUANT.out.abundance_tsv
ch_kallisto_multiqc = KALLISTO_QUANT.out.log
ch_kallisto_log = KALLISTO_QUANT.out.log
ch_kallisto_run_info = KALLISTO_QUANT.out.run_info
ch_kallisto_multiqc = ch_kallisto_counts.mix(ch_kallisto_run_info)
// QC reports collection
ch_reports = ch_reports.mix(KALLISTO_QUANT.out.log.collect{it[1]}.ifEmpty([]))
ch_reports_per_sample = ch_reports_per_sample.mix(ch_kallisto_multiqc)//.groupTuple(by: 0)
//
// MODULE: Merge all transcriptome quantification into a single file
//
@@ -160,6 +173,8 @@ workflow {
ch_star_flagstat = ALIGN_READS.out.flagstat
ch_star_idxstats = ALIGN_READS.out.idxstats
ch_star_multiqc = ALIGN_READS.out.log_final
// QC reports collection
ch_reports_per_sample = ch_reports_per_sample.mix(ch_star_multiqc)
ch_reports = ch_reports.mix(ALIGN_READS.out.log_final.collect{it[1]}.ifEmpty([]))
ch_star_bam_bai = ch_star_bam.join(ch_star_bai, by: [0])
//
@@ -182,131 +197,149 @@ workflow {
ch_samtools_flagstat = BAM_MARKDUPLICATES_PICARD.out.flagstat
ch_samtools_idxstats = BAM_MARKDUPLICATES_PICARD.out.idxstats
ch_markduplicates_multiqc = BAM_MARKDUPLICATES_PICARD.out.metrics
// QC reports collection
ch_reports_per_sample = ch_reports_per_sample.mix(ch_markduplicates_multiqc)
ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]))
ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]))
ch_genome_bam_bai = ch_genome_bam.join(ch_genome_bai, by: [0])
//
// MODULE: SplitNCigarReads and reassign mapping qualities
//
ch_split_bam = Channel.empty()
ch_split_bai = Channel.empty()
GATK4_SPLITNCIGARREADS (
ch_genome_bam_bai,
params.genome,
params.genome_idx,
params.genome_dict
)
ch_split_bam = GATK4_SPLITNCIGARREADS.out.bam
ch_split_bai = GATK4_SPLITNCIGARREADS.out.bai
//
// MODULE: Base Recalibration table generation
//
ch_recal_table = Channel.empty()
GATK4_BASE_RECALIBRATOR (
ch_split_bam,
ch_split_bai,
params.genome,
params.genome_idx,
params.genome_dict,
params.dbsnp,
params.dbsnp_tbi
)
ch_recal_table = GATK4_BASE_RECALIBRATOR.out.table
ch_reports = ch_reports.mix(ch_recal_table.map{ meta, table -> table})
//
// MODULE: Apply BQSR using recalibration table, then index
//
ch_split_bam_bai = ch_split_bam.join(ch_split_bai, by: [0])
ch_bam_bai_bqsr = ch_split_bam_bai.join(ch_recal_table, by: [0])
ch_bam_variant_calling = Channel.empty()
ch_bai_variant_calling = Channel.empty()
GATK4_APPLY_BQSR (
ch_bam_bai_bqsr,
params.genome,
params.genome_idx,
params.genome_dict
)
SAMTOOLS_INDEX_BQSR (
GATK4_APPLY_BQSR.out.bam
)
ch_bam_variant_calling = GATK4_APPLY_BQSR.out.bam
ch_bai_variant_calling = SAMTOOLS_INDEX_BQSR.out.bai
//
// MODULE: Call SNPs and Indels using HaplotypeCaller
//
ch_bam_bai_variant_calling = ch_bam_variant_calling.join(ch_bai_variant_calling, by: [0])
ch_haplotype_vcf = Channel.empty()
ch_haplotype_tbi = Channel.empty()
GATK4_HAPLOTYPECALLER (
ch_bam_bai_variant_calling,
params.genome,
params.genome_idx,
params.genome_dict,
params.dbsnp,
params.dbsnp_tbi
)
ch_haplotype_vcf = GATK4_HAPLOTYPECALLER.out.vcf
ch_haplotype_tbi = GATK4_HAPLOTYPECALLER.out.tbi
ch_haplotype_vcf_tbi = ch_haplotype_vcf.join(ch_haplotype_tbi, by: [0])
//
// MODULE: Filter variants using VariantFiltration
//
ch_filtered_vcf = Channel.empty()
GATK4_VARIANTFILTRATION (
ch_haplotype_vcf_tbi,
params.genome,
params.genome_idx,
params.genome_dict
)
ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf
if (params.format_contigs && params.contig_format_map) {
if (params.call_snps) {
ch_split_bam = Channel.empty()
ch_split_bai = Channel.empty()
GATK4_SPLITNCIGARREADS (
ch_genome_bam_bai,
params.genome,
params.genome_idx,
params.genome_dict
)
ch_split_bam = GATK4_SPLITNCIGARREADS.out.bam
ch_split_bai = GATK4_SPLITNCIGARREADS.out.bai
//
// MODULE: Base Recalibration table generation
//
ch_recal_table = Channel.empty()
GATK4_BASE_RECALIBRATOR (
ch_split_bam,
ch_split_bai,
params.genome,
params.genome_idx,
params.genome_dict,
params.dbsnp,
params.dbsnp_tbi
)
ch_recal_table = GATK4_BASE_RECALIBRATOR.out.table
// QC reports collection
ch_reports_per_sample = ch_reports_per_sample.mix(ch_recal_table)
ch_reports = ch_reports.mix(ch_recal_table.map{ meta, table -> table})
//
// MODULE: Convert VCF contigs to desired naming format (e.g. ucsc)
// MODULE: Apply BQSR using recalibration table, then index
//
BCFTOOLS_CONTIG_CONVERSION (
ch_filtered_vcf,
params.contig_format_map
ch_split_bam_bai = ch_split_bam.join(ch_split_bai, by: [0])
ch_bam_bai_bqsr = ch_split_bam_bai.join(ch_recal_table, by: [0])
ch_bam_variant_calling = Channel.empty()
ch_bai_variant_calling = Channel.empty()
GATK4_APPLY_BQSR (
ch_bam_bai_bqsr,
params.genome,
params.genome_idx,
params.genome_dict
)
SAMTOOLS_INDEX_BQSR (
GATK4_APPLY_BQSR.out.bam
)
ch_bam_variant_calling = GATK4_APPLY_BQSR.out.bam
ch_bai_variant_calling = SAMTOOLS_INDEX_BQSR.out.bai
//
// MODULE: Call SNPs and Indels using HaplotypeCaller
//
ch_bam_bai_variant_calling = ch_bam_variant_calling.join(ch_bai_variant_calling, by: [0])
ch_haplotype_vcf = Channel.empty()
ch_haplotype_tbi = Channel.empty()
GATK4_HAPLOTYPECALLER (
ch_bam_bai_variant_calling,
params.genome,
params.genome_idx,
params.genome_dict,
params.dbsnp,
params.dbsnp_tbi
)
ch_haplotype_vcf = GATK4_HAPLOTYPECALLER.out.vcf
ch_haplotype_tbi = GATK4_HAPLOTYPECALLER.out.tbi
ch_haplotype_multiqc = GATK4_HAPLOTYPECALLER.out.summary_metrics
ch_reports_per_sample = ch_reports_per_sample.mix(ch_haplotype_multiqc)
ch_haplotype_vcf_tbi = ch_haplotype_vcf.join(ch_haplotype_tbi, by: [0])
//
// MODULE: Filter variants using VariantFiltration
//
ch_filtered_vcf = Channel.empty()
GATK4_VARIANTFILTRATION (
ch_haplotype_vcf_tbi,
params.genome,
params.genome_idx,
params.genome_dict
)
ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf
// ch_filtered_multiqc = GATK4_VARIANTFILTRATION.out.stats
// ch_reports_per_sample = ch_reports_per_sample.mix(ch_filtered_multiqc)
if (params.format_contigs && params.contig_format_map) {
//
// MODULE: Convert VCF contigs to desired naming format (e.g. ucsc)
//
BCFTOOLS_CONTIG_CONVERSION (
ch_filtered_vcf,
params.contig_format_map
)
ch_filtered_vcf = BCFTOOLS_CONTIG_CONVERSION.out.formatted_vcf
}
//
// MODULE: Sort and index VCFs
//
ch_sorted_vcf = Channel.empty()
BCFTOOLS_SORT_VCF (
ch_filtered_vcf
)
ch_sorted_vcf = BCFTOOLS_SORT_VCF.out.sorted_vcf
//
// MODULE: Index VCFs
//
ch_vcf_index = Channel.empty()
BCFTOOLS_INDEX_VCF (
ch_sorted_vcf
)
// ch_sorted_vcf = BCFTOOLS_INDEX_VCF.out.sorted_vcf
ch_vcf_index = BCFTOOLS_INDEX_VCF.out.vcf_index
ch_vcf = ch_sorted_vcf.join(ch_vcf_index, by: [0])
// Collect all VCFs and index files from upstream process
meta = ch_vcf
.map { tuple -> tuple[0]}
.collect()
vcfs = ch_vcf
.map { tuple -> tuple[1]}
.collect()
tbis = ch_vcf
.map { tuple -> tuple[2]}
.collect()
//
// MODULE: Merge VCFs
//
BCFTOOLS_MERGE_VCF (
meta,
vcfs,
tbis
)
ch_filtered_vcf = BCFTOOLS_CONTIG_CONVERSION.out.formatted_vcf
}
//
// MODULE: Sort and index VCFs
//
ch_sorted_vcf = Channel.empty()
BCFTOOLS_SORT_VCF (
ch_filtered_vcf
)
ch_sorted_vcf = BCFTOOLS_SORT_VCF.out.sorted_vcf
//
// MODULE: Index VCFs
//
ch_vcf_index = Channel.empty()
BCFTOOLS_INDEX_VCF (
ch_sorted_vcf
)
// ch_sorted_vcf = BCFTOOLS_INDEX_VCF.out.sorted_vcf
ch_vcf_index = BCFTOOLS_INDEX_VCF.out.vcf_index
ch_vcf = ch_sorted_vcf.join(ch_vcf_index, by: [0])
// Collect all VCFs and index files from upstream process
meta = ch_vcf
.map { tuple -> tuple[0]}
.collect()
vcfs = ch_vcf
.map { tuple -> tuple[1]}
.collect()
tbis = ch_vcf
.map { tuple -> tuple[2]}
.collect()
//
// MODULE: Merge VCFs
// MODULE: Generate QC reports per sample ID using MULTIQC
//
BCFTOOLS_MERGE_VCF (
meta,
vcfs,
tbis
// group QC reports by their sample ID
ch_reports_per_sample_grpd = ch_reports_per_sample.groupTuple(by: 0)
MULTIQC_PER_SAMPLE(
ch_reports_per_sample_grpd
)
//
// MODULE: Generate QC reports using MULTIQC
// MODULE: Generate Total QC reports using MULTIQC
//
ch_multiqc_files = Channel
.empty()
3 changes: 2 additions & 1 deletion bulk_RNASeq/config/base.config
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ profiles {
singularity.autoMounts = true
process.executor = 'local'
process.cache = 'lenient'
executor.cpus = 314
trace.enabled = true
trace.taskMemory = true
}
@@ -18,7 +19,7 @@ profiles {
singularity.enabled = true
singularity.autoMounts = true
process.executor = 'slurm'
executor.queueSize = 60
executor.queueSize = 24
process.cache = 'lenient'
trace.enabled = true
trace.taskMemory = true
44 changes: 22 additions & 22 deletions bulk_RNASeq/config/container.config
Original file line number Diff line number Diff line change
@@ -7,95 +7,95 @@ process {


withLabel: 'fastp_trim_adapters' {
cpus = 6
cpus = Math.min(6, params.max_cpus_per_job)
}

withLabel: 'sortmerna_ribosomal_rna_removal' {
cpus = 20
cpus = Math.min(13, params.max_cpus_per_job)
time = '72h'
}

withLabel: 'kallisto_quant' {
cpus = 4
cpus = Math.min(4, params.max_cpus_per_job)
}

withLabel: 'star_align' {
cpus = 8
cpus = Math.min(8, params.max_cpus_per_job)
time = '72h'
}

withLabel: 'samtools_sort' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'samtools_index' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'samtools_stats' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}

withLabel: 'samtools_idxstats' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'samtools_flagstat' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'samtools_extract_mapped_reads' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'samtools_bam_to_cram' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'picard_markduplicates' {
cpus = 2
cpus = Math.min(2, params.max_cpus_per_job)
}

withLabel: 'gatk4_splitncigarreads' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
time = '72h'
}

withLabel: 'gatk4_recalibrator' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}

withLabel: 'gatk4_apply_bqsr' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
time = '48h'
}

withLabel: 'gatk4_haplotypecaller' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
time = '148h'
}

withLabel: 'gatk4_variantfiltration' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}

withLabel: 'bcftools_contig_conversion' {
cpus = 4
cpus = Math.min(4, params.max_cpus_per_job)
}

withLabel: 'bcftools_sort_vcf' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}

withLabel: 'bcftools_index_vcf' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}

withLabel: 'bcftools_merge_vcf' {
cpus = 8
cpus = Math.min(8, params.max_cpus_per_job)
}

withLabel: 'multiqc' {
cpus = 1
cpus = Math.min(1, params.max_cpus_per_job)
}
}
2 changes: 2 additions & 0 deletions bulk_RNASeq/config/parameters.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

params {
max_cpus_per_job = 24
salmon_quant_libtype= null
fragment_length_mean= 200
fragment_length_std = 20
@@ -11,6 +12,7 @@ params {
gatk_vf_qd_filter = 2.0
umitools_dedup_stats= false
filter_rrna = true
call_snps = true
format_contigs = true
adapter_sequence_1 = "CTGTCTCTTATACACATCT"
adapter_sequence_2 = "CTGTCTCTTATACACATCT"
4 changes: 1 addition & 3 deletions bulk_RNASeq/modules/bcftools_merge_vcf.nf
Original file line number Diff line number Diff line change
@@ -39,7 +39,5 @@ process BCFTOOLS_MERGE_VCF {
--output merged_snps.bcf \\
${vcfs.join(' ')}
"""
}


}
}
2 changes: 1 addition & 1 deletion bulk_RNASeq/modules/fastp_trim_adapters.nf
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ process FASTP_TRIM_ADAPTERS {

output:
tuple val(meta), path("*.fastp.fastq.gz") , emit: trimmed_reads
path("*.fastp.json") , emit: json_report
tuple val(meta), path("*.fastp.json") , emit: json_report
path("*.fastp.html") , emit: html_report

when:
8 changes: 8 additions & 0 deletions bulk_RNASeq/modules/gatk4_haplotype_caller.nf
Original file line number Diff line number Diff line change
@@ -19,6 +19,8 @@ process GATK4_HAPLOTYPECALLER {
output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
tuple val(meta), path("*.tbi") , optional:true, emit: tbi
tuple val(meta), path("*.variant_calling_detail_metrics"), emit: detail_metrics
tuple val(meta), path("*.variant_calling_summary_metrics"), emit: summary_metrics

when:
task.ext.when == null || task.ext.when
@@ -36,5 +38,11 @@ process GATK4_HAPLOTYPECALLER {
$dbsnp_command \\
--tmp-dir \$PWD \\
$args
gatk --java-options "-Xmx${task.memory.toGiga()}G" \\
CollectVariantCallingMetrics \\
--DBSNP $known_sites \\
--INPUT ${prefix}.vcf.gz \\
--OUTPUT ${prefix}
"""
}
8 changes: 4 additions & 4 deletions bulk_RNASeq/modules/kallisto_quant.nf
Original file line number Diff line number Diff line change
@@ -20,23 +20,23 @@ process KALLISTO_QUANT {
output:
tuple val(meta), path("${prefix}.kallisto.abundance.h5") , emit: abundance_h5
tuple val(meta), path("${prefix}.kallisto.abundance.tsv") , emit: abundance_tsv
tuple val(meta), path("${prefix}.kallisto.run_info.json"), emit: json_info, optional: true
tuple val(meta), path("${prefix}.kallisto.log"), emit: log, optional: true
tuple val(meta), path("${prefix}.kallisto.run_info.json"), emit: run_info
tuple val(meta), path("${prefix}.kallisto.log"), emit: log


script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
kallisto quant --index $transcript_index --output-dir ${prefix} -t ${task.cpus} --single -l ${params.fragment_length_mean} -s ${params.fragment_length_std} ${reads} > ${prefix}.kallisto.log
kallisto quant --index $transcript_index --output-dir ${prefix} -t ${task.cpus} --single -l ${params.fragment_length_mean} -s ${params.fragment_length_std} ${reads} &> ${prefix}.kallisto.log
mv ${prefix}/abundance.h5 ${prefix}.kallisto.abundance.h5
mv ${prefix}/abundance.tsv ${prefix}.kallisto.abundance.tsv
mv ${prefix}/run_info.json ${prefix}.kallisto.run_info.json
"""
} else {
"""
kallisto quant --index $transcript_index --output-dir ${prefix} -t ${task.cpus} ${reads[0]} ${reads[1]} > ${prefix}.kallisto.log
kallisto quant --index $transcript_index --output-dir ${prefix} -t ${task.cpus} ${reads[0]} ${reads[1]} &> ${prefix}.kallisto.log
mv ${prefix}/abundance.h5 ${prefix}.kallisto.abundance.h5
mv ${prefix}/abundance.tsv ${prefix}.kallisto.abundance.tsv
mv ${prefix}/run_info.json ${prefix}.kallisto.run_info.json
28 changes: 28 additions & 0 deletions bulk_RNASeq/modules/multiqc_per_sample.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
process MULTIQC_PER_SAMPLE {
tag "$meta.id"
publishDir "${params.results_directory}/multiqc_per_sample", mode: 'copy'
label 'multiqc_per_sample'
memory {
// File size in GB
fileSize = log_files.size() / (1024 * 1024 * 1024)
return 1.GB + (1.GB * fileSize)
}

input:
tuple val(meta), path(log_files)

output:
path "*multiqc_report.html", emit: report
path "*_data" , emit: data
path "*_plots" , optional:true, emit: plots

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
multiqc --filename ${prefix}_multiqc_report.html -f $args .
"""
}
4 changes: 3 additions & 1 deletion bulk_RNASeq/modules/sortmerna_rrna_removal.nf
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {

output:
tuple val(meta), path("*.sortmerna.fastq.gz"), emit: reads
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("*.sortmerna.log") , emit: log

when:
task.ext.when == null || task.ext.when
@@ -40,6 +40,7 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {
--aligned rRNA_reads \\
--fastx \\
--other non_rRNA_reads \\
--log \\
$args
mv non_rRNA_reads.f*q.gz ${prefix}.sortmerna.fastq.gz
@@ -58,6 +59,7 @@ process SORTMERNA_RIBOSOMAL_RNA_REMOVAL {
--other non_rRNA_reads \\
--paired_in \\
--out2 \\
--log \\
$args
mv non_rRNA_reads_fwd.f*q.gz ${prefix}_R1.sortmerna.fastq.gz