Skip to content

Commit

Permalink
r243: in GFF3, the last CDS includes stop codon
Browse files Browse the repository at this point in the history
Resolves #55
  • Loading branch information
lh3 committed Mar 6, 2024
1 parent d74cfdb commit 61a352f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
9 changes: 6 additions & 3 deletions format.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,11 @@ static void mp_write_gff(kstring_t *s, void *km, const mp_idx_t *mi, const mp_bs

for (j = 0; j < r->n_feat; ++j) {
f = &feat[j];
vs = r->vid&1? ctg->len - f->ve : f->vs;
ve = r->vid&1? ctg->len - f->vs : f->ve;
ve = f->ve;
if (has_stop && f->type == MP_FEAT_CDS && j + 1 < r->n_feat && feat[j+1].type == MP_FEAT_STOP) // in GFF3, the last CDS includes stop codon. GTF is different!
ve += 3;
vs = r->vid&1? ctg->len - ve : f->vs;
ve = r->vid&1? ctg->len - f->vs : ve;
mp_sprintf_lite(s, "%s\tminiprot\t%s\t%d\t%d\t%d\t%c\t%d\tParent=%s;Rank=%d", ctg->name, f->type == MP_FEAT_STOP? "stop_codon" : "CDS",
(int)vs + 1, (int)ve, f->score, "+-"[r->vid&1], f->phase, id_str, hit_idx);
if (f->type == MP_FEAT_CDS) {
Expand Down Expand Up @@ -399,7 +402,7 @@ static void mp_write_gtf(kstring_t *s, void *km, const mp_idx_t *mi, const mp_bs
for (j = 0; j < r->n_feat; ++j) {
int64_t vs2, ve2;
f = &feat[j];
if (f->type != MP_FEAT_CDS) continue;
if (f->type != MP_FEAT_CDS) continue; // GTF is simpler without stop_codon and additional attributes
vs2 = vs = r->vid&1? ctg->len - f->ve : f->vs;
ve2 = ve = r->vid&1? ctg->len - f->vs : f->ve;
if (f->ve == r->ve) { // last exon; then adjust for stop codon
Expand Down
5 changes: 4 additions & 1 deletion miniprot.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH miniprot 1 "24 June 2023" "miniprot-0.12 (r237)" "Bioinformatics tools"
.TH miniprot 1 "5 March 2024" "miniprot-0.12-dirty (r243)" "Bioinformatics tools"
.SH NAME
.PP
miniprot - protein-to-genome alignment with splicing and frameshifts
Expand Down Expand Up @@ -47,6 +47,9 @@ Sample k-mers at a rate
.BI -L \ INT
Minimum ORF length to index [30]
.TP
.BI -T \ INT
NCBI translation table (1 through 5) [1]
.TP
.BI -b \ INT
Number of bits per bin [8]. Miniprot splits the genome into non-overlapping bins of 2^8 bp in size.
.TP
Expand Down
2 changes: 1 addition & 1 deletion miniprot.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <stdint.h>

#define MP_VERSION "0.12-r239-dirty"
#define MP_VERSION "0.12-r243-dirty"

#define MP_F_NO_SPLICE 0x1
#define MP_F_NO_ALIGN 0x2
Expand Down

0 comments on commit 61a352f

Please sign in to comment.