forked from macmanes-lab/tuco_manuscript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
7_removal of unannotated transcripts and analysis
27 lines (21 loc) · 1.69 KB
/
7_removal of unannotated transcripts and analysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#filter.py script to pull out annotated transcripts from assembly
"""
%prog some.fasta wanted-list.txt
"""
from Bio import SeqIO
import sys
wanted = [line.strip() for line in open(sys.argv[2])]
seqiter = SeqIO.parse(open(sys.argv[1]), 'fasta')
SeqIO.write((seq for seq in seqiter if seq.id in wanted), sys.stdout, "fasta")
_______________________________________________________________________________________________________________
#Generating an annotation only assembly, and assessing with BUSCO and TransRate
awk '{print $1}' /data/andrew/subsample/good.tuco_trinity.binpacker_cons.fa.dammit/good.tuco_trinity.binpacker_cons.fa.dammit.gff3 | sort -u > annotated_IDs.txt
python /data/andrew/filter.py /data/andrew/tuco_final_assembly/tuco_final_assembly.annotated.fasta annotated_IDs.txt > tuco_final_assembly_annotation_only.fasta
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in tuco_final_assembly_annotation_only.fasta \
-m trans --cpu 20 -l /opt/BUSCO_v1.1b1/vertebrata \
-o BUSCO_tuco_final_assembly_annotation_only
/opt/transrate-1.0.1-linux-x86_64/transrate -o TRANSRATE_annotation_only \
-a tuco_final_assembly_annotation_only.fasta \
--left /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.1.fq \
--right /data/andrew/subsample/reads/tuco.compiled.50M.trimmed.2.fq -t 20
python3 /opt/BUSCO_v1.1b1/BUSCO_v1.1b1.py -in /data/andrew/tuco_final_assembly/assembly_trimmed/TRANSRATE_annotation_only/tuco_final_assembly_annotation_only/good.tuco_final_assembly_annotation_only.fasta -m trans --cpu 20 -l /opt/BUSCO_v1.1b1/vertebrata -o BUSCO_good.tuco_final_assembly_annotation_only