-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorf.py
52 lines (46 loc) · 2.04 KB
/
orf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
CODON_PROTEIN_MAP = {
'TTT': 'F', 'CTT': 'L', 'ATT': 'I', 'GTT': 'V',
'TTC': 'F', 'CTC': 'L', 'ATC': 'I', 'GTC': 'V',
'TTA': 'L', 'CTA': 'L', 'ATA': 'I', 'GTA': 'V',
'TTG': 'L', 'CTG': 'L', 'ATG': 'M', 'GTG': 'V',
'TCT': 'S', 'CCT': 'P', 'ACT': 'T', 'GCT': 'A',
'TCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',
'TCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',
'TCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',
'TAT': 'Y', 'CAT': 'H', 'AAT': 'N', 'GAT': 'D',
'TAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',
'TAA': 'Stop', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',
'TAG': 'Stop', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',
'TGT': 'C', 'CGT': 'R', 'AGT': 'S', 'GGT': 'G',
'TGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',
'TGA': 'Stop', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',
'TGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'
}
def reverse_complement(s):
complements = {'A':'T', 'T':'A', 'G':'C', 'C':'G'}
list = [complements[c] for c in reversed(s)]
return ''.join(list)
def translate_codon(codon):
if codon in CODON_PROTEIN_MAP and len(codon) == 3:
return CODON_PROTEIN_MAP[codon]
def transcribe_anywhere(s):
proteins = []
for i in range(len(s)):
protein = translate_codon(s[i:i+3])
if protein == 'M':
protein_string = protein
for j in range(i+3,len(s), 3):
protein = translate_codon(s[j:j+3])
if protein == 'Stop':
proteins.append(protein_string)
break
protein_string += protein
return proteins
if __name__ == "__main__":
s = "AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTTTGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG"
l = transcribe_anywhere(s)
l += transcribe_anywhere(reverse_complement(s))
x = set()
for seq in l:
x.add(seq)
print(x)