rpm_prep

#!/bin/bash

# rpm_prep
# RPINerd, 02/26/21
#
# When executed in a directory with R1 and R2.fastq.gz files, script will extract each one, use seqtk to subsample it and then once both files are sampled, join them into a report using the read_pair_merger.py script I wrote

# SeqTK Software for subsampling fastq files
export PATH="$PATH:/home/choward/bin/source/SeqTK/"

# Set up report file name
if [[ -z $1 ]]; then
    outfile="report"
    echo "No output file provided. Defaulting to 'report.csv'!"
else
    outfile=$1
fi

# Get list of all fastq files in current directory
files=$(ls ./*.fastq.gz)
read=1

# TODO be able to distinguish separate samples within the same folder
for file in $files
    do
        filename="$read.fastq"

        # Extract file
        gzip -cdk "$file" > $filename

        # Subsample
        seqtk sample -s420 $filename 2500 > $filename.sub.fastq

        read=$((read+1))
    done

python3 ~/bin/read_pair_merger.py 1.fastq.sub.fastq 2.fastq.sub.fastq "$outfile".csv