Skip to content

Commit

Permalink
r219: added --outc to filter out very short hits
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Mar 7, 2023
1 parent 87f11cb commit a939024
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 4 deletions.
3 changes: 3 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ static ko_longopt_t long_options[] = {
{ "no-pre-chain", ko_no_argument, 310 },
{ "aln", ko_no_argument, 311 },
{ "max-intron-out", ko_required_argument, 312 },
{ "outc", ko_required_argument, 313 },
{ "version", ko_no_argument, 401 },
{ "no-kalloc", ko_no_argument, 501 },
{ "dbg-qname", ko_no_argument, 502 },
Expand Down Expand Up @@ -83,6 +84,7 @@ static void print_usage(FILE *fp, const mp_idxopt_t *io, const mp_mapopt_t *mo,
fprintf(fp, " -u print unmapped query proteins in PAF\n");
fprintf(fp, " --outn=NUM output up to min{NUM,-N} alignments per query [%d]\n", mo->out_n);
fprintf(fp, " --outs=FLOAT output if score at least FLOAT*bestScore [%g]\n", mo->out_sim);
fprintf(fp, " --outc=FLOAT output if at least FLOAT fraction of query is aligned [%g]\n", mo->out_cov);
fprintf(fp, " -K NUM query batch size [2M]\n");
}

Expand Down Expand Up @@ -140,6 +142,7 @@ int main(int argc, char *argv[])
else if (c == 310) mo.flag |= MP_F_NO_PRE_CHAIN; // --no-pre-chain
else if (c == 311) mo.flag |= MP_F_SHOW_RESIDUE; // --aln
else if (c == 312) mo.max_intron_flank = (mp_parse_num(o.arg) + 1) / 2; // --max-intron-out
else if (c == 313) mo.out_cov = atof(o.arg); // --outc
else if (c == 501) mp_dbg_flag |= MP_DBG_NO_KALLOC; // --no-kalloc
else if (c == 502) mp_dbg_flag |= MP_DBG_QNAME; // --dbg-qname
else if (c == 503) mp_dbg_flag |= MP_DBG_NO_REFINE; // --dbg-no-refine
Expand Down
6 changes: 4 additions & 2 deletions map.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,11 @@ static void *worker_pipeline(void *shared, int step, void *in)
best_sc = s->reg[i][0].p? s->reg[i][0].p->dp_max : s->reg[i][0].chn_sc;
}
for (j = 0; j < s->n_reg[i] && j < p->opt->out_n; ++j) {
int32_t sc = s->reg[i][j].p? s->reg[i][j].p->dp_max : s->reg[i][j].chn_sc;
const mp_reg1_t *r = &s->reg[i][j];
int32_t sc = r->p? r->p->dp_max : r->chn_sc;
if (sc <= 0 || sc < (double)best_sc * p->opt->out_sim) continue;
mp_write_output(&p->str, 0, p->mi, &s->seq[i], &s->reg[i][j], p->opt, ++p->id, j + 1);
if (r->qe - r->qs < (double)s->seq[i].l_seq * p->opt->out_cov) continue;
mp_write_output(&p->str, 0, p->mi, &s->seq[i], r, p->opt, ++p->id, j + 1);
fwrite(p->str.s, 1, p->str.l, stdout);
}
for (j = 0; j < s->n_reg[i]; ++j) {
Expand Down
5 changes: 5 additions & 0 deletions miniprot.1
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ Output an alignment only if its score is at least
.IR FLOAT *bestScore,
where bestScore is the best alignment score of the protein [0.99]
.TP
.BI --outc \ FLOAT
Output an alignment only if
.I FLOAT
fraction of the query protein is aligned [0.1]
.TP
.BI -K \ NUM
Query batch size [2M]
.SH OUTPUT FORMAT
Expand Down
4 changes: 2 additions & 2 deletions miniprot.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <stdint.h>

#define MP_VERSION "0.7-r218-dirty"
#define MP_VERSION "0.7-r219-dirty"

#define MP_F_NO_SPLICE 0x1
#define MP_F_NO_ALIGN 0x2
Expand Down Expand Up @@ -55,7 +55,7 @@ typedef struct {
float mask_level;
int32_t mask_len;
float pri_ratio;
float out_sim;
float out_sim, out_cov;
int32_t best_n, out_n;
int32_t kmer2;
int32_t go, ge, io, fs; // gap open, extension, intron open, and frame-shift/stop-codon
Expand Down
1 change: 1 addition & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ void mp_mapopt_init(mp_mapopt_t *mo)
mo->best_n = 30;
mo->out_n = 1000;
mo->out_sim = 0.99f;
mo->out_cov = 0.1f;
#if MP_BITS_PER_AA == 4
mo->kmer2 = 5;
#else
Expand Down

0 comments on commit a939024

Please sign in to comment.