From b44387d5f91fd58e11fa94dc37e5ed7dae92fa81 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 7 Oct 2024 10:29:08 -0400 Subject: [PATCH] r261: allow negative splice score --- miniprot.h | 2 +- nasw-sse.c | 8 ++++---- nasw.h | 2 ++ ntseq.c | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/miniprot.h b/miniprot.h index a8ddd19..4c4cd0e 100644 --- a/miniprot.h +++ b/miniprot.h @@ -3,7 +3,7 @@ #include -#define MP_VERSION "0.13-r260-dirty" +#define MP_VERSION "0.13-r261-dirty" #define MP_F_NO_SPLICE 0x1 #define MP_F_NO_ALIGN 0x2 diff --git a/nasw-sse.c b/nasw-sse.c index 76aa19e..68fc7fd 100644 --- a/nasw-sse.c +++ b/nasw-sse.c @@ -127,8 +127,8 @@ static uint8_t *ns_prep_seq(void *km, const char *ns, int32_t nl, const char *as if (ss) { // NB: ss[] uses the offset rule to specify a splice site; donor/acceptor[] uses a different rule. The ss[] way is better but too tricky to change now for (i = 1; i < nl; ++i) { if (ss[i]>>1 == 0) continue; - if (ss[i]&1) acceptor[i-1] -= (int8_t)(ss[i]>>1); - else donor[i-1] -= (int8_t)(ss[i]>>1); + if (ss[i]&1) acceptor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET; + else donor[i-1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET; } } ns_prep_nas(ns, nl, opt, nas); @@ -170,8 +170,8 @@ static uint8_t *ns_prep_seq_left(void *km, const char *ns, int32_t nl, const cha if (ss) { for (i = 0; i < nl; ++i) { if (ss[i]>>1 == 0) continue; - if (ss[i]&1) donor[nl - i - 1] -= (int8_t)(ss[i]>>1); - else acceptor[nl - i - 1] -= (int8_t)(ss[i]>>1); + if (ss[i]&1) donor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET; + else acceptor[nl - i - 1] -= (int8_t)(ss[i]>>1) - (int8_t)NS_SPSC_OFFSET; } } ns_prep_nas(ns, nl, opt, nas); diff --git a/nasw.h b/nasw.h index 84492e4..510c468 100644 --- a/nasw.h +++ b/nasw.h @@ -51,6 +51,8 @@ #define NS_S_GENERIC 1 #define NS_S_MAMMAL 2 +#define NS_SPSC_OFFSET 64 + extern char *ns_tab_nt_i2c, *ns_tab_aa_i2c; extern uint8_t ns_tab_a2r[22], ns_tab_nt4[256], ns_tab_aa20[256], ns_tab_aa13[256]; extern uint8_t ns_tab_codon[64], ns_tab_codon13[64]; diff --git a/ntseq.c b/ntseq.c index a7a9f59..0709a20 100644 --- a/ntseq.c +++ b/ntseq.c @@ -242,6 +242,7 @@ int32_t mp_ntseq_read_spsc(mp_ntdb_t *nt, const char *fn, int32_t max_sc) fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "rb") : gzdopen(0, "rb"); if (fp == 0) return -1; + if (max_sc > 63) max_sc = 63; mp_ntseq_index_name(nt); nt->spsc = Kcalloc(0, mp_spsc_t, nt->n_ctg * 2); ks = ks_init(fp); @@ -271,15 +272,15 @@ int32_t mp_ntseq_read_spsc(mp_ntdb_t *nt, const char *fn, int32_t max_sc) } } if (i < 4) continue; // not enough fields - if (score <= 0) continue; if (score > max_sc) score = max_sc; + if (score < -max_sc) score = -max_sc; cid = mp_ntseq_name2id(nt, name); if (cid < 0 || type < 0 || strand == 0 || pos < 0) continue; // FIXME: give a warning! s = &nt->spsc[cid << 1 | (strand > 0? 0 : 1)]; Kgrow(0, uint64_t, s->a, s->n, s->m); if (strand < 0) pos = nt->ctg[cid].len - pos; if (pos > 0 && pos < nt->ctg[cid].len) { // ignore scores at the ends - s->a[s->n++] = (uint64_t)pos<<8 | score<<1 | type; + s->a[s->n++] = (uint64_t)pos << 8 | (score + NS_SPSC_OFFSET) << 1 | type; ++n_read; } }