-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpp_kmer.py
38 lines (30 loc) · 1019 Bytes
/
pp_kmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Find a Profile-most probable k-mer in a string.
# Input: A string Text, an integer k, and a k × 4 matrix Profile.
# Output: A Profile-most probable k-mer in Text.
import sys
from operator import mul
lines = open(sys.argv[1].strip(), 'r').readlines()
dna = lines[0].strip()
k = int(lines[1])
profile = [map(float, l.split(" ")) for l in lines[3:]]
def calculate_probablity(kmer):
t = []
for i in range(len(kmer)):
if kmer[i] == 'A':
t.append(profile[i][0])
elif kmer[i] == 'C':
t.append(profile[i][1])
elif kmer[i] == 'G':
t.append(profile[i][2])
elif kmer[i] == 'T':
t.append(profile[i][3])
return reduce(mul, t, 1)
best_pattern = ""
best_probability = 0.0000
for i in range(len(dna) - k + 1):
kmer = dna[i:i+k]
# print kmer, calculate_probablity(kmer)
if calculate_probablity(kmer) > best_probability:
best_pattern = kmer
best_probability = calculate_probablity(kmer)
print best_pattern