-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprimer_seq.py
88 lines (59 loc) · 2.11 KB
/
primer_seq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
print "hello world!"
#------------------------------------------------------------------
#Defining the function
#------------------------------------------------------------------
def best_pos( sequence, primer):
primer.upper()
sequence.upper()
best_score = 0
position = []
for i in range(0, len(sequence) - len(primer)): # -1 here to avoid going over length of i
local_score = 0
for j in range(0, len(primer)):
if sequence[i + j] == primer[j]: #Anchors I and then loops J over I
local_score += 1 # Append local score
#print best_score
if (local_score > best_score):
position = []
position.append( str(i) )
best_score = local_score
elif ( local_score == best_score): #Appends best local score to global best score.
pass
position.append(str(i))
# print "score:" + str(best_score) + ",".join(str(position))
return (best_score, position)
#------------------------------------------------------------------
#Looping through inputs
#------------------------------------------------------------------
import sys
import re
import pprint
def readin_fasta(file):
infile = open(file, 'rU')
res = []
seq_name = ""
seq = ""
for line in infile:
line = line.strip("\n")
if ( re.match(r'\>', line)):
line = re.sub(r'\>', "", line)
if ( seq_name ):
res.append( [seq_name, seq] )
#res[ seq_name ] = seq
seq = ""
seq_name = line
else:
seq += line
if ( seq_name ):
res.append( [seq_name, seq] )
pprint.pprint( res )
return res
# ----------- MAIN LOOP --------------
references = readin_fasta(sys.argv[ 1 ])
primers = readin_fasta(sys.argv[ 2 ])
for reference in references:
[ref_name, ref_seq] = reference
for primer in primers:
[primer_name, primer_seq] = primer
(score, pos) = best_pos(ref_seq, primer_seq)
print "%s vs %s (score: %d) (pos :%s)" % (ref_name, primer_name, score, "-".join(pos))