Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert "properly read" constants to command line parameters #309

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Convert "properly read" constants to command line parameters
Greg Gibeling committed Jan 29, 2021
commit 8c15147be4c3b7e38e7cbd03449bfffd89f3329b
5 changes: 5 additions & 0 deletions bwamem.c
Original file line number Diff line number Diff line change
@@ -105,6 +105,11 @@ mem_opt_t *mem_opt_init()
o->min_chain_weight = 0;
o->max_chain_extend = 1<<30;
o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len);

o->outlier_bound_mem = 2.0;
o->mapping_bound_mem = 3.0;
o->max_stddev_mem = 4.0;

bwa_fill_scmat(o->a, o->b, o->mat);
return o;
}
4 changes: 4 additions & 0 deletions bwamem.h
Original file line number Diff line number Diff line change
@@ -81,6 +81,10 @@ typedef struct {
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
int max_XA_hits, max_XA_hits_alt; // if there are max_hits or fewer, output them all
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset

float outlier_bound_mem; // controls how to calculate low and high boundaries for computing mean and std.dev
float mapping_bound_mem; // controls how to calculate low and high boundaries for proper pairs. Low and high boundaries is calculated : F(Q1, Q2, Q3, mapping_bound_mem, max_stddev_mem) where Q2 is median insert size, Q1 and Q3 are lower and higher quartile (also see max_stddev_mem).
float max_stddev_mem; // controls how to calculate low and high boundaries for proper pairs. Low and high boundaries is calculated : F(Q1, Q2, Q3, mapping_bound_mem, max_stddev_mem) where Q2 is median insert size, Q1 and Q3 are lower and higher quartile (also see mapping_bound_mem).
} mem_opt_t;

typedef struct {
13 changes: 7 additions & 6 deletions bwamem_pair.c
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@
#define MIN_RATIO 0.8
#define MIN_DIR_CNT 10
#define MIN_DIR_RATIO 0.05

#define OUTLIER_BOUND 2.0
#define MAPPING_BOUND 3.0
#define MAX_STDDEV 4.0
@@ -103,9 +104,9 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
p25 = q->a[(int)(.25 * q->n + .499)];
p50 = q->a[(int)(.50 * q->n + .499)];
p75 = q->a[(int)(.75 * q->n + .499)];
r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
r->low = (int)(p25 - opt->outlier_bound_mem * (p75 - p25) + .499);
if (r->low < 1) r->low = 1;
r->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
r->high = (int)(p75 + opt->outlier_bound_mem * (p75 - p25) + .499);
fprintf(stderr, "[M::%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75);
fprintf(stderr, "[M::%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r->low, r->high);
for (i = x = 0, r->avg = 0; i < q->n; ++i)
@@ -117,10 +118,10 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
r->std += (q->a[i] - r->avg) * (q->a[i] - r->avg);
r->std = sqrt(r->std / x);
fprintf(stderr, "[M::%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r->avg, r->std);
r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499);
r->high = (int)(p75 + MAPPING_BOUND * (p75 - p25) + .499);
if (r->low > r->avg - MAX_STDDEV * r->std) r->low = (int)(r->avg - MAX_STDDEV * r->std + .499);
if (r->high < r->avg + MAX_STDDEV * r->std) r->high = (int)(r->avg + MAX_STDDEV * r->std + .499);
r->low = (int)(p25 - opt->mapping_bound_mem * (p75 - p25) + .499);
r->high = (int)(p75 + opt->mapping_bound_mem * (p75 - p25) + .499);
if (r->low > r->avg - opt->max_stddev_mem * r->std) r->low = (int)(r->avg - opt->max_stddev_mem * r->std + .499);
if (r->high < r->avg + opt->max_stddev_mem * r->std) r->high = (int)(r->avg + opt->max_stddev_mem * r->std + .499);
if (r->low < 1) r->low = 1;
fprintf(stderr, "[M::%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r->low, r->high);
free(q->a);
8 changes: 7 additions & 1 deletion fastmap.c
Original file line number Diff line number Diff line change
@@ -156,8 +156,11 @@ int main_mem(int argc, char *argv[])

aux.opt = opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) {
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:b:e:g:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == 'b') opt->outlier_bound_mem = atof(optarg);
else if (c == 'e') opt->mapping_bound_mem = atof(optarg);
else if (c == 'g') opt->max_stddev_mem = atof(optarg);
else if (c == '1') no_mt_io = 1;
else if (c == 'x') mode = optarg;
else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
@@ -267,6 +270,9 @@ int main_mem(int argc, char *argv[])
fprintf(stderr, "Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq]\n\n");
fprintf(stderr, "Algorithm options:\n\n");
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
fprintf(stderr, " -b FLOAT controls how to calculate low and high boundaries for computing mean and std.dev [%f]\n", opt->outlier_bound_mem);
fprintf(stderr, " -e FLOAT mapping bound mem. Controls how to calculate low and high boundaries for proper pairs. [%f]\n", opt->mapping_bound_mem);
fprintf(stderr, " -g FLOAT max stddev mem. Controls how to calculate low and high boundaries for proper pairs. [%f]\n", opt->max_stddev_mem);
fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len);
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop);