-
Notifications
You must be signed in to change notification settings - Fork 25
/
ksw2_gg.c
102 lines (95 loc) · 3.17 KB
/
ksw2_gg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <stdio.h> // for debugging only
#include "ksw2.h"
typedef struct { int32_t h, e; } eh_t;
int ksw_gg(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_)
{
eh_t *eh;
int8_t *qp; // query profile
int32_t i, j, k, gapoe = gapo + gape, score, n_col, *off = 0;
uint8_t *z = 0; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be more complex
// allocate memory
if (w < 0) w = tlen > qlen? tlen : qlen;
n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix
qp = (int8_t*)kmalloc(km, qlen * m);
eh = (eh_t*)kcalloc(km, qlen + 1, 8);
if (m_cigar_ && n_cigar_ && cigar_) {
*n_cigar_ = 0;
z = (uint8_t*)kmalloc(km, (size_t)n_col * tlen);
off = (int32_t*)kcalloc(km, tlen, 4);
}
// generate the query profile
for (k = i = 0; k < m; ++k) {
const int8_t *p = &mat[k * m];
for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
}
// fill the first row
eh[0].h = 0, eh[0].e = -gapoe - gapoe;
for (j = 1; j <= qlen && j <= w; ++j)
eh[j].h = -(gapoe + gape * (j - 1)), eh[j].e = -(gapoe + gapoe + gape * j);
for (; j <= qlen; ++j) eh[j].h = eh[j].e = KSW_NEG_INF; // everything is -inf outside the band
// DP loop
for (i = 0; i < tlen; ++i) { // target sequence is in the outer loop
int32_t f = KSW_NEG_INF, h1, st, en;
int8_t *q = &qp[target[i] * qlen];
st = i > w? i - w : 0;
en = i + w + 1 < qlen? i + w + 1 : qlen;
h1 = st > 0? KSW_NEG_INF : -(gapoe + gape * i);
f = st > 0? KSW_NEG_INF : -(gapoe + gapoe + gape * i);
if (m_cigar_ && n_cigar_ && cigar_) {
uint8_t *zi = &z[(long)i * n_col];
off[i] = st;
for (j = st; j < en; ++j) {
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
// Cells are computed in the following order:
// H(i,j) = max{H(i-1,j-1) + S(i,j), E(i,j), F(i,j)}
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
eh_t *p = &eh[j];
int32_t h = p->h, e = p->e;
uint8_t d; // direction
p->h = h1;
h += q[j];
d = h >= e? 0 : 1;
h = h >= e? h : e;
d = h >= f? d : 2;
h = h >= f? h : f;
h1 = h;
h -= gapoe;
e -= gape;
d |= e > h? 0x08 : 0;
e = e > h? e : h;
p->e = e;
f -= gape;
d |= f > h? 0x10 : 0; // if we want to halve the memory, use one bit only, instead of two
f = f > h? f : h;
zi[j - st] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
}
} else {
for (j = st; j < en; ++j) {
eh_t *p = &eh[j];
int32_t h = p->h, e = p->e;
p->h = h1;
h += q[j];
h = h >= e? h : e;
h = h >= f? h : f;
h1 = h;
h -= gapoe;
e -= gape;
e = e > h? e : h;
p->e = e;
f -= gape;
f = f > h? f : h;
}
}
eh[en].h = h1, eh[en].e = KSW_NEG_INF;
}
// backtrack
score = eh[qlen].h;
kfree(km, qp); kfree(km, eh);
if (m_cigar_ && n_cigar_ && cigar_) {
ksw_backtrack(km, 0, 0, 0, z, off, 0, n_col, tlen-1, qlen-1, m_cigar_, n_cigar_, cigar_);
kfree(km, z);
kfree(km, off);
}
return score;
}