diff --git a/main.c b/main.c index 9aa157c..eb1d834 100644 --- a/main.c +++ b/main.c @@ -16,6 +16,7 @@ static ko_longopt_t long_options[] = { { "no-pre-chain", ko_no_argument, 310 }, { "aln", ko_no_argument, 311 }, { "max-intron-out", ko_required_argument, 312 }, + { "outc", ko_required_argument, 313 }, { "version", ko_no_argument, 401 }, { "no-kalloc", ko_no_argument, 501 }, { "dbg-qname", ko_no_argument, 502 }, @@ -83,6 +84,7 @@ static void print_usage(FILE *fp, const mp_idxopt_t *io, const mp_mapopt_t *mo, fprintf(fp, " -u print unmapped query proteins in PAF\n"); fprintf(fp, " --outn=NUM output up to min{NUM,-N} alignments per query [%d]\n", mo->out_n); fprintf(fp, " --outs=FLOAT output if score at least FLOAT*bestScore [%g]\n", mo->out_sim); + fprintf(fp, " --outc=FLOAT output if at least FLOAT fraction of query is aligned [%g]\n", mo->out_cov); fprintf(fp, " -K NUM query batch size [2M]\n"); } @@ -140,6 +142,7 @@ int main(int argc, char *argv[]) else if (c == 310) mo.flag |= MP_F_NO_PRE_CHAIN; // --no-pre-chain else if (c == 311) mo.flag |= MP_F_SHOW_RESIDUE; // --aln else if (c == 312) mo.max_intron_flank = (mp_parse_num(o.arg) + 1) / 2; // --max-intron-out + else if (c == 313) mo.out_cov = atof(o.arg); // --outc else if (c == 501) mp_dbg_flag |= MP_DBG_NO_KALLOC; // --no-kalloc else if (c == 502) mp_dbg_flag |= MP_DBG_QNAME; // --dbg-qname else if (c == 503) mp_dbg_flag |= MP_DBG_NO_REFINE; // --dbg-no-refine diff --git a/map.c b/map.c index c87924f..f53dadf 100644 --- a/map.c +++ b/map.c @@ -301,9 +301,11 @@ static void *worker_pipeline(void *shared, int step, void *in) best_sc = s->reg[i][0].p? s->reg[i][0].p->dp_max : s->reg[i][0].chn_sc; } for (j = 0; j < s->n_reg[i] && j < p->opt->out_n; ++j) { - int32_t sc = s->reg[i][j].p? s->reg[i][j].p->dp_max : s->reg[i][j].chn_sc; + const mp_reg1_t *r = &s->reg[i][j]; + int32_t sc = r->p? r->p->dp_max : r->chn_sc; if (sc <= 0 || sc < (double)best_sc * p->opt->out_sim) continue; - mp_write_output(&p->str, 0, p->mi, &s->seq[i], &s->reg[i][j], p->opt, ++p->id, j + 1); + if (r->qe - r->qs < (double)s->seq[i].l_seq * p->opt->out_cov) continue; + mp_write_output(&p->str, 0, p->mi, &s->seq[i], r, p->opt, ++p->id, j + 1); fwrite(p->str.s, 1, p->str.l, stdout); } for (j = 0; j < s->n_reg[i]; ++j) { diff --git a/miniprot.1 b/miniprot.1 index 17a8dcc..871abab 100644 --- a/miniprot.1 +++ b/miniprot.1 @@ -191,6 +191,11 @@ Output an alignment only if its score is at least .IR FLOAT *bestScore, where bestScore is the best alignment score of the protein [0.99] .TP +.BI --outc \ FLOAT +Output an alignment only if +.I FLOAT +fraction of the query protein is aligned [0.1] +.TP .BI -K \ NUM Query batch size [2M] .SH OUTPUT FORMAT diff --git a/miniprot.h b/miniprot.h index 914d869..e70d2d7 100644 --- a/miniprot.h +++ b/miniprot.h @@ -3,7 +3,7 @@ #include -#define MP_VERSION "0.7-r218-dirty" +#define MP_VERSION "0.7-r219-dirty" #define MP_F_NO_SPLICE 0x1 #define MP_F_NO_ALIGN 0x2 @@ -55,7 +55,7 @@ typedef struct { float mask_level; int32_t mask_len; float pri_ratio; - float out_sim; + float out_sim, out_cov; int32_t best_n, out_n; int32_t kmer2; int32_t go, ge, io, fs; // gap open, extension, intron open, and frame-shift/stop-codon diff --git a/options.c b/options.c index e481bd0..2016214 100644 --- a/options.c +++ b/options.c @@ -61,6 +61,7 @@ void mp_mapopt_init(mp_mapopt_t *mo) mo->best_n = 30; mo->out_n = 1000; mo->out_sim = 0.99f; + mo->out_cov = 0.1f; #if MP_BITS_PER_AA == 4 mo->kmer2 = 5; #else