-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprimer_seq_counts.py
142 lines (96 loc) · 3.89 KB
/
primer_seq_counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pprint
import re
import os
pp = pprint.PrettyPrinter(indent=4)
#------------------------------------------------------------------
#Defining the function
#------------------------------------------------------------------
def best_pos( sequence, primer):
nr_comp = 0
primer.upper()
sequence.upper()
best_score = 0
position = []
for i in range(0, len(sequence) - len(primer)): # -1 here to avoid going over length of i
local_score = 0
for j in range(0, len(primer)):
nr_comp += 1
if sequence[i + j] == primer[j]: #Anchors I and then loops J over I
local_score += 1 # Append local score
#print best_score
if (local_score > best_score):
position = []
position.append( str(i) )
best_score = local_score
elif ( local_score == best_score): #Appends best local score to global best score.
pass
position.append(str(i))
print "Comparisons : " + str(nr_comp)
print "score:" + str(best_score) + ",".join(position)
return (best_score, position)
# ----------- MAIN LOOP --------------
def best_pos_bounds( sequence, primer):
nr_comp = 0
primer.upper()
sequence.upper()
best_score = 0
position = []
for i in range(0, len(sequence) - len(primer)): # -1 here to avoid going over length of i
local_score = 0
for j in range(0, len(primer)):
if ( best_score > len(primer) - j + local_score):
continue
# print "%d > %d - %d + %d" % (best_score, len(primer), j, local_score)
nr_comp += 1
if sequence[i + j] == primer[j]: #Anchors I and then loops J over I
local_score += 1 # Append local score
#print best_score
if (local_score > best_score):
position = []
position.append( str(i) )
best_score = local_score
elif ( local_score == best_score): #Appends best local score to global best score.
pass
position.append(str(i))
print "Comparisons : " + str(nr_comp)
print "Score: " + str(best_score) + ", - ".join(position)
return (best_score, position)
def best_pos_by_index_seq(sequence, primer, seed_length):
nr_comp = 0
primer.upper()
sequence.upper()
best_score = 0
position = []
seeds = dict()
# build the index
for i in range(0, len(sequence) - seed_length):
seed = sequence[ i: i + seed_length]
if ( seed not in seeds):
seeds[ seed ] = []
seeds[seed].append( i )
primer_seed = primer[0:seed_length]
pp.pprint(seeds[ primer_seed])
for pos in (seeds[ primer_seed]):
local_score = 0
for j in range(0, len(primer)):
# if ( best_score > len(primer) - j + local_score):
# continue
nr_comp += 1
if sequence[pos + j] == primer[j]: #Anchors I and then loops J over I
local_score += 1 # Append local score
#print best_score
if (local_score > best_score):
position = []
position.append( str(i) )
best_score = local_score
elif ( local_score == best_score): #Appends best local score to global best score.
pass
position.append(str(i))
print "Comparisons : " + str(nr_comp)
return
# ----------- MAIN LOOP --------------
best_pos("AGACCAGATCTGAGCTTGGGAGCTCTTGGCATAACTAGGGAACCACAGTTTGAAACGT", "CTTGGCATAA")
best_pos_bounds("AGACCAGATCTGAGCTTGGGAGCTCTTGGCATAACTAGGGAACCACAGTTTGAAACGT", "CTTGGCATAA")
best_pos_bounds("AGACCAGACTTGGCATAATCTGAGCTTGGGAGCTCTAGGGAACCACAGTTTGAAACGT", "CTTGGCATAA")
best_pos_by_index_seq("AGACCAGACTTGGCATAATCTGAGCTTGGGAGCTCTAGGGAACCACAGTTTGAAACGT", "CTTGGCATAA", 3)
best_pos_by_index_seq("AGACCAGACTTGGCATAATCTGAGCTTGGGAGCTCTAGGGAACCACAGTTTGAAACGT", "CTTGGCATAA", 5)