Skip to content

Commit

Permalink
exposed and documented basic C APIs (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Dec 5, 2024
1 parent be7b90c commit 7153f15
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 3 deletions.
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ endif

all:$(PROG)

miniprot:$(OBJS) main.o
$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
libminiprot.a:$(OBJS)
$(AR) -csru $@ $(OBJS)

miniprot:$(OBJS) main.o libminiprot.a
$(CC) $(CFLAGS) main.o -o $@ -L. -lminiprot $(LIBS)

miniprot-lite:example.o libminiprot.a
$(CC) $(CFLAGS) $< -o $@ -L. -lminiprot $(LIBS)

clean:
rm -fr *.o a.out $(PROG) *~ *.a *.dSYM
Expand All @@ -39,6 +45,7 @@ depend:
align.o: mppriv.h miniprot.h nasw.h kalloc.h kseq.h
bseq.o: kvec-km.h kalloc.h mppriv.h miniprot.h nasw.h kseq.h
chain.o: mppriv.h miniprot.h nasw.h kalloc.h kseq.h
example.o: miniprot.h nasw.h kalloc.h kseq.h
format.o: kseq.h mppriv.h miniprot.h nasw.h kalloc.h
hit.o: mppriv.h miniprot.h nasw.h kalloc.h kseq.h
index.o: mppriv.h miniprot.h nasw.h kalloc.h kseq.h kvec-km.h kthread.h
Expand Down
64 changes: 64 additions & 0 deletions example.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// To compile:
// gcc -g -O2 example.c libminiprot.a -lz

#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <zlib.h>
#include "miniprot.h"
#include "nasw.h" // needed for CIGAR
#include "kseq.h"
KSEQ_INIT(gzFile, gzread)

int main(int argc, char *argv[])
{
mp_idxopt_t iopt;
mp_mapopt_t mopt;
int n_threads = 3;

mp_start(); // this set the translation table to "1"
//ns_make_tables(1); // set the translation table if not "1"
mp_verbose = 2; // disable message output to stderr
mp_idxopt_init(&iopt);
mp_mapopt_init(&mopt);

if (argc < 3) {
fprintf(stderr, "Usage: miniprot-lite <target.fa> <query.fa>\n");
return 1;
}

// open query file for reading; you may use your favorite FASTA/Q parser
gzFile f = gzopen(argv[2], "r");
assert(f);
kseq_t *ks = kseq_init(f);

// open index reader
mp_idx_t *mi = mp_idx_load(argv[1], &iopt, n_threads);
if (mi != 0) {
mp_tbuf_t *tbuf = mp_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread
while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence
mp_reg1_t *reg;
int j, i, n_reg;
reg = mp_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &mopt, 0); // get all hits for the query
for (j = 0; j < n_reg; ++j) { // traverse hits and print them out
mp_reg1_t *r = &reg[j];
assert(r->p);
const mp_ctg_t *ctg = &mi->nt->ctg[r->vid>>1];
int64_t rs = r->vid&1? ctg->len - r->ve : r->vs;
int64_t re = r->vid&1? ctg->len - r->vs : r->ve;
printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, (int)ks->seq.l, r->qs, r->qe, "+-"[r->vid&1]);
printf("%s\t%ld\t%ld\t%ld\t%d\t%d\t0\tcg:Z:", ctg->name, (long)ctg->len, (long)rs, (long)re, r->p->n_iden * 3, r->p->blen);
for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
printf("%d%c", r->p->cigar[i]>>4, NS_CIGAR_STR[r->p->cigar[i]&0xf]);
putchar('\n');
free(r->p);
}
free(reg);
}
mp_tbuf_destroy(tbuf);
mp_idx_destroy(mi);
}
kseq_destroy(ks); // close the query file
gzclose(f);
return 0;
}
118 changes: 117 additions & 1 deletion miniprot.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,21 +150,137 @@ extern char *ns_tab_nt_i2c, *ns_tab_aa_i2c;
extern uint8_t ns_tab_a2r[22], ns_tab_nt4[256], ns_tab_aa20[256], ns_tab_aa13[256];
extern uint8_t ns_tab_codon[64], ns_tab_codon13[64];

/**
* Set the translation table and builtin timer
*
* Run this before all other miniprot routines.
*/
void mp_start(void);

/**
* Initialize the default parameters for indexing
*
* @param io struct to initialize (out)
*/
void mp_idxopt_init(mp_idxopt_t *io);

/**
* Initialize the default parameters fr mapping
*
* @param mo struct to initialize (out)
*/
void mp_mapopt_init(mp_mapopt_t *mo);

/**
* Set frameshift and stop codon penalty
*
* This function changes the mp_mapopt_t::fs field and mp_mapopt_t::mat[]
*
* @param mo mapping options (in/out)
* @param fs frameshift and stop codon penalty (positive)
*/
void mp_mapopt_set_fs(mp_mapopt_t *mo, int32_t fs);

/**
* Set max intron length based on the genome size
*
* @param mo mapping options (in/out)
* @param gsize genome size
*/
void mp_mapopt_set_max_intron(mp_mapopt_t *mo, int64_t gsize);

/**
* Check the integrity of mapping parameters (not complete)
*
* @param mo mapping options
*
* @return 0 on success, or negative on failure
*/
int32_t mp_mapopt_check(const mp_mapopt_t *mo);

/**
* Load or build an index
*
* If _fn_ corresponds to a FASTA file, build the index using _io_ and
* _n_threads_. If _fn_ corresponds to a prebuilt index file, load the index
* and ignore _io_ and _n_threads_.
*
* @param fn FASTA or index file name
* @param io indexing options
* @param n_threads number of threads
*
* @return the index
*/
mp_idx_t *mp_idx_load(const char *fn, const mp_idxopt_t *io, int32_t n_threads);

/**
* Deallocate the index
*
* @param mi the index
*/
void mp_idx_destroy(mp_idx_t *mi);

/**
* Write an index to disk
*
* @param fn index file name
* @param mi the index
*
* @return 0 on success or negative on failure
*/
int mp_idx_dump(const char *fn, const mp_idx_t *mi);

/**
* Read an index file
*
* @param fn index file name
*
* @return the index, or NULL on failure
*/
mp_idx_t *mp_idx_restore(const char *fn);
void mp_idx_print_stat(const mp_idx_t *mi, int32_t max_occ);

/**
* Load a splice score file
*
* @param nt sequences in mp_idx_t::nt (in/out)
* @param fn splice score file name; can be optionally gzip'd
* @param max_sc cap splice scores at this parameter
*
* @return 0 on success, or negative on failure
*/
int32_t mp_ntseq_read_spsc(mp_ntdb_t *nt, const char *fn, int32_t max_sc);

/**
* Align a protein sequence against the index
*
* @param mi the index
* @param qlen protein length
* @param seq protein sequence
* @param n_reg number of alignments (out)
* @param b thread-local buffer; threads shouldn't share buffers (can be NULL)
* @param opt mapping parameters
* @param qname query name (used for breaking ties)
*
* @return list of alignments
*/
mp_reg1_t *mp_map(const mp_idx_t *mi, int qlen, const char *seq, int *n_reg, mp_tbuf_t *b, const mp_mapopt_t *opt, const char *qname);

/**
* Initialize a thread-local buffer
*
* @return the buffer
*/
mp_tbuf_t *mp_tbuf_init(void);

/**
* Deallocate a thread-local buffer
*
* @param b the buffer
*/
void mp_tbuf_destroy(mp_tbuf_t *b);

// ignore the following for now
void mp_idx_print_stat(const mp_idx_t *mi, int32_t max_occ);
int32_t mp_map_file(const mp_idx_t *idx, const char *fn, const mp_mapopt_t *opt, int n_threads);

#ifdef __cplusplus
Expand Down

0 comments on commit 7153f15

Please sign in to comment.