forked from macmanes-lab/tuco_manuscript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
5_50M read subsample, Trinity and BinPacker assembly, and analysis
50 lines (34 loc) · 4.23 KB
/
5_50M read subsample, Trinity and BinPacker assembly, and analysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
________________________________________________________________________________________________________________________________
#Concatenate all readfiles and subsample 50M reads from left and right files
cat /data/andrew/reads_corrected/tuco_hippocampus.1.cor.fq,/data/andrew/reads_corrected/tuco_hypothalamus.1.cor.fq,/data/andrew/reads_corrected/tuco_kidney.1.cor.fq,/data/andrew/reads_corrected/tuco_liver.1.cor.fq,/data/andrew/reads_corrected/tuco_ovary.1.cor.fq,/data/andrew/reads_corrected/tuco_skin.1.cor.fq,/data/andrew/reads_corrected/tuco_spleen.1.cor.fq,/data/andrew/reads_corrected/tuco_testes.1.cor.fq
cat /data/andrew/reads_corrected/tuco_hippocampus.2.cor.fq,/data/andrew/reads_corrected/tuco_hypothalamus.2.cor.fq,/data/andrew/reads_corrected/tuco_kidney.2.cor.fq,/data/andrew/reads_corrected/tuco_liver.2.cor.fq,/data/andrew/reads_corrected/tuco_ovary.2.cor.fq,/data/andrew/reads_corrected/tuco_skin.2.cor.fq,/data/andrew/reads_corrected/tuco_spleen.2.cor.fq,/data/andrew/reads_corrected/tuco_testes.2.cor.fq
seqtk sample -s1025340 /data/andrew/reads_corrected/tuco.compiled.1.cor.fq 50000000 > tuco.compiled.50M-1.1.fq |
seqtk sample -s1025340 /data/andrew/reads_corrected/tuco.compiled.2.cor.fq 50000000 > tuco.compiled.50M-1.2.fq
________________________________________________________________________________________________________________________________
#Assemble with Trinity and assess with BUSCO and TransRate
Trinity --output tuco_compiled.50M-1.trinity --full_cleanup --seqType fq --max_memory 60G --CPU 20 --trimmomatic \
--quality_trimming_params "ILLUMINACLIP:/data/andrew/adapters/TruSeq3-PE.fa:2:40:15 LEADING:2 TRAILING:2 MINLEN:25" \
--left /data/andrew/subsample/reads/tuco.compiled.50M-1.1.fq \
--right /data/andrew/subsample/reads/tuco.compiled.50M-1.2.fq
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in /data/andrew/subsample/tuco_compiled.50M-1.trinity.Trinity.fasta \
-m trans --cpu 16 -l /opt/BUSCO_v1.1b1/vertebrata -o BUSCO_tuco_compiled.50M-1
/opt/transrate-1.0.1-linux-x86_64/transrate -o TRANSRATE_tuco_compiled.50M-1 \
-a /data/andrew/subsample/tuco_compiled.50M-1.trinity.Trinity.fasta \
--left /data/andrew/subsample/reads/tuco.compiled.50M-1.1.fq \
--right /data/andrew/subsample/reads/tuco.compiled.50M-1.2.fq -t 20
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in /data/andrew/subsample/TRANSRATE_tuco_compiled.50M-1/tuco_compiled.50M-1.trinity.Trinity/good.tuco_compiled.50M-1.trinity.Trinity.fasta \
-m trans --cpu 16 -l /opt/BUSCO_v1.1b1/vertebrata -o BUSCO_good.tuco_compiled.50M-1
________________________________________________________________________________________________________________________________
#Trim read files with Trimmomatic (same trimming as with Trinity assembly), assemble with BinPacker, and assess with BUSCO and TransRate
java -jar /opt/Trimmomatic-0.32/trimmomatic-0.32.jar PE -phred33 -threads 20 -baseout tuco.compiled.50M.trimmed /data/andrew/subsample/reads/tuco.compiled.50M-1.1.fq /data/andrew/subsample/reads/tuco.compiled.50M-1.2.fq ILLUMINACLIP:/data/andrew/adapters/TruSeq3-PE.fa:2:40:15 LEADING:2 TRAILING:2 SLIDINGWINDOW:4:2 MINLEN:25
mv tuco.compiled.50M.trimmed_1P tuco.compiled.50M.trimmed.1.fq
mv tuco.compiled.50M.trimmed_2P tuco.compiled.50M.trimmed.2.fq
/opt/BinPacker/BinPacker -d -q -s fq -p pair -l /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.1.fq -r /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.2.fq -m RF -k 25 -g 200 -o binpacker_tuco_compiled.50M
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in /data/andrew/subsample/binpacker_tuco_compiled.50M/tuco.compiled.50M.binpacker.fa \
-m trans --cpu 16 -l /opt/BUSCO_v1.1b1/vertebrata -o BUSCO_tuco_compiled.50M.binpacker
/opt/transrate-1.0.1-linux-x86_64/transrate -o TRANSRATE_tuco_compiled.50M.binpacker \
-a /data/andrew/subsample/binpacker_tuco_compiled.50M/tuco.compiled.50M.binpacker.fa \
--left /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.1.fq \
--right /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.2.fq -t 20
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in /data/andrew/subsample/TRANSRATE_tuco_compiled.50M.binpacker/tuco.compiled.50M.binpacker/tuco.compiled.50M.binpacker.fa \
-m trans --cpu 16 -l /opt/BUSCO_v1.1b1/vertebrata -o BUSCO_good.tuco_compiled.50M.binpacker