Skip to content

Commit

Permalink
r262: penalize sites not in --spsc
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Oct 7, 2024
1 parent b44387d commit be7b90c
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 11 deletions.
1 change: 1 addition & 0 deletions align.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ static inline void mp_map2ns_opt(const mp_mapopt_t *mo, ns_opt_t *no)
no->go = mo->go, no->ge = mo->ge, no->io = mo->io, no->fs = mo->fs, no->xdrop = mo->xdrop, no->sc = mo->mat;
no->ie_coef = mo->ie_coef;
no->end_bonus = mo->end_bonus;
no->sp_null_bonus = mo->sp_null_bonus;
ns_opt_set_sp(no, mo->sp_model);
for (i = 0; i < 6; ++i) no->sp[i] = (int32_t)(no->sp[i] * mo->sp_scale + .499f);
}
Expand Down
8 changes: 7 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ static ko_longopt_t long_options[] = {
{ "trans", ko_no_argument, 315 },
{ "no-cs", ko_no_argument, 316 },
{ "spsc", ko_required_argument, 317 },
{ "spsc0", ko_required_argument, 318 },
{ "version", ko_no_argument, 401 },
{ "no-kalloc", ko_no_argument, 501 },
{ "dbg-qname", ko_no_argument, 502 },
Expand Down Expand Up @@ -81,6 +82,7 @@ static void print_usage(FILE *fp, const mp_idxopt_t *io, const mp_mapopt_t *mo,
fprintf(fp, " -B INT bonus score for alignment reaching query ends [%d]\n", mo->end_bonus);
fprintf(fp, " -j INT splice model: 2=mammal, 1=general, 0=none (see manual) [%d]\n", mo->sp_model);
fprintf(fp, " --spsc=FILE splice score []\n");
fprintf(fp, " --spsc0=INT splice penalty for sites not in --spsc [%d]\n", mo->sp_null_bonus);
fprintf(fp, " Input/output:\n");
fprintf(fp, " -t INT number of threads [%d]\n", n_threads);
fprintf(fp, " --gff output in the GFF3 format\n");
Expand Down Expand Up @@ -161,7 +163,11 @@ int main(int argc, char *argv[])
else if (c == 504) mp_dbg_flag |= MP_DBG_MORE_DP; // --dbg-aflt
else if (c == 505) mp_dbg_flag |= MP_DBG_ANCHOR; // --dbg-anchor
else if (c == 506) mp_dbg_flag |= MP_DBG_CHAIN; // --dbg-chain
else if (c == 's') {
else if (c == 318) { // --spsc0
int32_t s;
s = atoi(o.arg);
mo.sp_null_bonus = s < 0? s : -s;
} else if (c == 's') {
fprintf(stderr, "Option '-s' is deprecated.\n");
} else if (c == 401) {
printf("%s\n", MP_VERSION);
Expand Down
3 changes: 2 additions & 1 deletion miniprot.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <stdint.h>

#define MP_VERSION "0.13-r261-dirty"
#define MP_VERSION "0.13-r262-dirty"

#define MP_F_NO_SPLICE 0x1
#define MP_F_NO_ALIGN 0x2
Expand Down Expand Up @@ -65,6 +65,7 @@ typedef struct {
int32_t io_end;
float ie_coef;
int32_t sp_model;
int32_t sp_null_bonus;
float sp_scale;
int32_t xdrop;
int32_t end_bonus;
Expand Down
22 changes: 16 additions & 6 deletions nasw-sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,14 @@ static uint8_t *ns_prep_seq(void *km, const char *ns, int32_t nl, const char *as
}
if (ss) { // NB: ss[] uses the offset rule to specify a splice site; donor/acceptor[] uses a different rule. The ss[] way is better but too tricky to change now
for (i = 1; i < nl; ++i) {
if (ss[i]>>1 == 0) continue;
if (ss[i]&1) acceptor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
else donor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
if (ss[i] == 0xff) { // score not set
donor[i-1] -= opt->sp_null_bonus;
acceptor[i-1] -= opt->sp_null_bonus;
} else if (ss[i]&1) { // acceptor
acceptor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
} else { // donor
donor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
}
}
}
ns_prep_nas(ns, nl, opt, nas);
Expand Down Expand Up @@ -169,9 +174,14 @@ static uint8_t *ns_prep_seq_left(void *km, const char *ns, int32_t nl, const cha
}
if (ss) {
for (i = 0; i < nl; ++i) {
if (ss[i]>>1 == 0) continue;
if (ss[i]&1) donor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
else acceptor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
if (ss[i] == 0xff) {
donor[nl - i - 1] -= opt->sp_null_bonus;
acceptor[nl - i - 1] -= opt->sp_null_bonus;
} else if (ss[i]&1) {
donor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
} else {
acceptor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET;
}
}
}
ns_prep_nas(ns, nl, opt, nas);
Expand Down
1 change: 1 addition & 0 deletions nasw-tab.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ void ns_opt_init(ns_opt_t *opt)
opt->xdrop = 100;
opt->end_bonus = 5;
ns_opt_set_sp(opt, NS_S_MAMMAL);
opt->sp_null_bonus = -7;
opt->asize = 22;
opt->ie_coef = .5f;
opt->sc = ns_mat_blosum62;
Expand Down
1 change: 1 addition & 0 deletions nasw.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
int32_t xdrop, end_bonus; // xdrop for extension, and bonus for reaching end of proteins
int32_t asize; // alphabet size; always 22 in the current implementation
int32_t sp[6]; // 0:pos3, 1:GC-AC, 2:AT-AC, 3:other, 4:pos0, 5:poly-Y
int32_t sp_null_bonus; // it is negative
float ie_coef;
const int8_t *sc; // 22x22 scoring matrix
uint8_t *nt4, *aa20, *codon; // char-to-int convertion table and codon table
Expand Down
5 changes: 2 additions & 3 deletions ntseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ int64_t mp_ntseq_spsc_get(const mp_ntdb_t *db, int32_t cid, int64_t st0, int64_t
if (en0 < 0 || en0 > db->ctg[cid].len) en0 = db->ctg[cid].len;
if (!rev) st = st0, en = en0;
else st = db->ctg[cid].len - en0, en = db->ctg[cid].len - st0;
memset(sc, 0, en - st);
memset(sc, 0xff, en - st);
s = &db->spsc[cid << 1 | (!!rev)];
if (s->n > 0) {
int32_t j, l, r;
Expand All @@ -146,8 +146,7 @@ int64_t mp_ntseq_spsc_get(const mp_ntdb_t *db, int32_t cid, int64_t st0, int64_t
uint8_t score = s->a[j] & 0xff;
assert(x <= en - st);
if (x == en - st) continue;
//fprintf(stderr, "st=%lld, j=%d, pos=%lld\n", st0, j, s->a[j]>>8);
sc[x] = sc[x] > score? sc[x] : score;
if (sc[x] == 0xff || sc[x] < score) sc[x] = score;
}
}
return en - st;
Expand Down
1 change: 1 addition & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ void mp_mapopt_init(mp_mapopt_t *mo)
mo->io_end = 19;
mo->ie_coef = .5f;
mo->sp_model = NS_S_GENERIC;
mo->sp_null_bonus = -7;
mo->sp_scale = 1.0f;
mo->end_bonus = 5;
mo->xdrop = 100;
Expand Down

0 comments on commit be7b90c

Please sign in to comment.