-
Notifications
You must be signed in to change notification settings - Fork 0
/
rpm_prep
37 lines (28 loc) · 1004 Bytes
/
rpm_prep
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# rpm_prep
# RPINerd, 02/26/21
#
# When executed in a directory with R1 and R2.fastq.gz files, script will extract each one, use seqtk to subsample it and then once both files are sampled, join them into a report using the read_pair_merger.py script I wrote
# SeqTK Software for subsampling fastq files
export PATH="$PATH:/home/choward/bin/source/SeqTK/"
# Set up report file name
if [[ -z $1 ]]; then
outfile="report"
echo "No output file provided. Defaulting to 'report.csv'!"
else
outfile=$1
fi
# Get list of all fastq files in current directory
files=$(ls ./*.fastq.gz)
read=1
# TODO be able to distinguish separate samples within the same folder
for file in $files
do
filename="$read.fastq"
# Extract file
gzip -cdk "$file" > $filename
# Subsample
seqtk sample -s420 $filename 2500 > $filename.sub.fastq
read=$((read+1))
done
python3 ~/bin/read_pair_merger.py 1.fastq.sub.fastq 2.fastq.sub.fastq "$outfile".csv