Skip to content

Commit

Permalink
change preset naming
Browse files Browse the repository at this point in the history
  • Loading branch information
ruanjue committed Apr 17, 2019
1 parent a8c7ee3 commit f460eee
Showing 1 changed file with 46 additions and 37 deletions.
83 changes: 46 additions & 37 deletions wtdbg.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,23 +127,23 @@ int usage(int level){
" -t <int> Number of threads, 0 for all cores, [4]\n"
" -f Force to overwrite output files\n"
" -x <string> Presets, comma delimited, []\n"
" rsII/rs: -p 21 -S 4 -s 0.05 -L 5000\n"
" sequel/sq\n"
" nanopore/ont:\n"
" (genome size < 1G) -p 0 -k 15 -AS 2 -s 0.05 -L 5000\n"
" (genome size >= 1G) -p 19 -AS 2 -s 0.05 -L 5000\n"
" corrected/ccs: -p 21 -k 0 -AS 4 -K 0.05 -s 0.5\n"
" Example: '-e 3 -x ont -S 1' in parsing order, -e will be 3, -S will be 1\n"
" preset1/rsII/rs: -p 21 -S 4 -s 0.05 -L 5000\n"
" preset2: -p 0 -k 15 -AS 2 -s 0.05 -L 5000\n"
" preset3: -p 19 -AS 2 -s 0.05 -L 5000\n"
" sequel/sq\n"
" nanopore/ont:\n"
" (genome size < 1G: preset2) -p 0 -k 15 -AS 2 -s 0.05 -L 5000\n"
" (genome size >= 1G: preset3) -p 19 -AS 2 -s 0.05 -L 5000\n"
" preset4/corrected/ccs: -p 21 -k 0 -AS 4 -K 0.05 -s 0.5\n"
" -g <number> Approximate genome size (k/m/g suffix allowed) [0]\n"
" -X <float> Choose the best <float> depth from input reads(effective with -g) [50]\n"
" -X <float> Choose the best <float> depth from input reads(effective with -g) [50.0]\n"
" -L <int> Choose the longest subread and drop reads shorter than <int> (5000 recommended for PacBio) [0]\n"
" Negative integer indicate tidying read names too, e.g. -5000.\n"
" -k <int> Kmer fsize, 0 <= k <= 25, [0]\n"
" -p <int> Kmer psize, 0 <= p <= 25, [21]\n"
" k + p <= 25, seed is <k-mer>+<p-homopolymer-compressed>\n"
" -K <float> Filter high frequency kmers, maybe repetitive, [1000.05]\n"
" >= 1000 and indexing >= (1 - 0.05) * total_kmers_count\n"
" -E <int> Min kmer frequency, [2]\n"
" -S <float> Subsampling kmers, 1/(<-S>) kmers are indexed, [4.00]\n"
" -S is very useful in saving memeory and speeding up\n"
" please note that subsampling kmers will have less matched length\n"
Expand Down Expand Up @@ -180,8 +180,8 @@ int usage(int level){
" See -p 21\n"
" --kmer-depth-max <float>\n"
" See -K 1000.05\n"
" --kmer-depth-min <int>\n"
" See -E\n"
" -E, --kmer-depth-min <int>\n"
" Min kmer frequency, [2]\n"
//" --kmer-depth-min-filter\n"
//" See -F\n"
//" `wtdbg` uses a 4 Gbytes array to counting the occurence (0-3) of kmers in the way of counting-bloom-filter. It will reduce memory space largely\n"
Expand Down Expand Up @@ -367,7 +367,7 @@ int main(int argc, char **argv){
FILE *evtlog;
char *prefix, *dump_seqs, *load_seqs, *dump_kbm, *load_kbm, *load_nodes, *load_clips;
char regtag[14];
int len, tag_size, asyn_read, seq_type;
int len, tag_size, asyn_read, preset;
u8i tot_bp, cnt, bub, tip, rep, yarn, max_bp, max_idx_bp, nfix, opt_flags;
uint32_t i, j, k;
int c, opt_idx, ncpu, only_fix, realign, node_cov, max_node_cov, exp_node_cov, min_bins, edge_cov, store_low_cov_edge, reglen, regovl, bub_step, tip_step, rep_step;
Expand All @@ -384,7 +384,7 @@ int main(int argc, char **argv){
mem_stingy = 0;
tidy_reads = 0;
tidy_rdtag = -1;
seq_type = 0; // 0, unknown; 1: rs; 2: sq; 3: ont; 4: ccs
preset = 0;
genome_size = 0;
genome_depx = 50.0;
num_index = 1;
Expand Down Expand Up @@ -488,15 +488,18 @@ int main(int argc, char **argv){
if(KBM_LOG){
fprintf(KBM_LOGF, " -- Preset: '%s' --", beg); fflush(KBM_LOGF);
}
if(strcasecmp(beg, "rs") == 0 || strcasecmp(beg, "rsII") == 0){
seq_type = 1;
if(strcasecmp(beg, "preset1") == 0 || strcasecmp(beg, "rs") == 0 || strcasecmp(beg, "rsII") == 0){
preset = 1;
} else if(strcasecmp(beg, "preset2") == 0){
preset = 2;
} else if(strcasecmp(beg, "preset3") == 0){
preset = 3;
} else if(strcasecmp(beg, "sq") == 0 || strcasecmp(beg, "sequel") == 0){
seq_type = 2;
preset = -1;
} else if(strcasecmp(beg, "ont") == 0 || strcasecmp(beg, "nanopore") == 0){
seq_type = 3;
} else if(strcasecmp(beg, "ccs") == 0 || strcasecmp(beg, "corrected") == 0){
seq_type = 4;
tidy_reads = 5000;
preset = -1;
} else if(strcasecmp(beg, "preset4") == 0 || strcasecmp(beg, "ccs") == 0 || strcasecmp(beg, "corrected") == 0){
preset = 4;
} else {
fprintf(stderr, " ** ERROR: cannot recognize '%s' in '-x %s'\n", beg, optarg);
exit(1);
Expand Down Expand Up @@ -620,7 +623,14 @@ int main(int argc, char **argv){
return usage(-1);
}
if(max_idx_bp == 0) max_idx_bp = 0xFFFFFFFFFFFFFFFFLLU;
switch(seq_type){
if(preset == -1){
if(genome_size && genome_size < 1000000000LLU){
preset = 2;
} else {
preset = 3;
}
}
switch(preset){
case 1:
if(!(opt_flags & (1 << 1))) par->ksize = 0;
if(!(opt_flags & (1 << 0))) par->psize = 21;
Expand All @@ -630,22 +640,20 @@ int main(int argc, char **argv){
if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
break;
case 2:
if(!(opt_flags & (1 << 1))) par->ksize = 15;
if(!(opt_flags & (1 << 0))) par->psize = 0;
if(!(opt_flags & (1 << 2))) par->kmer_mod = 2 * KBM_N_HASH;
if(!(opt_flags & (1 << 3))) par->min_sim = 0.05;
if(!(opt_flags & (1 << 5))) par->skip_contained = 0;
if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
break;
case 3:
if(genome_size && genome_size < 1000000000LLU){
if(!(opt_flags & (1 << 1))) par->ksize = 15;
if(!(opt_flags & (1 << 0))) par->psize = 0;
if(!(opt_flags & (1 << 2))) par->kmer_mod = 2 * KBM_N_HASH;
if(!(opt_flags & (1 << 3))) par->min_sim = 0.05;
if(!(opt_flags & (1 << 5))) par->skip_contained = 0;
if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
} else {
if(!(opt_flags & (1 << 1))) par->ksize = 0;
if(!(opt_flags & (1 << 0))) par->psize = 19;
if(!(opt_flags & (1 << 2))) par->kmer_mod = 2 * KBM_N_HASH;
if(!(opt_flags & (1 << 3))) par->min_sim = 0.05;
if(!(opt_flags & (1 << 5))) par->skip_contained = 0;
if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
}
if(!(opt_flags & (1 << 1))) par->ksize = 0;
if(!(opt_flags & (1 << 0))) par->psize = 19;
if(!(opt_flags & (1 << 2))) par->kmer_mod = 2 * KBM_N_HASH;
if(!(opt_flags & (1 << 3))) par->min_sim = 0.05;
if(!(opt_flags & (1 << 5))) par->skip_contained = 0;
if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
break;
case 4:
if(!(opt_flags & (1 << 1))) par->ksize = 0;
Expand All @@ -655,6 +663,7 @@ int main(int argc, char **argv){
if(!(opt_flags & (1 << 5))) par->skip_contained = 0;
if(!(opt_flags & (1 << 6))){ par->kmax = 0; par->ktop = 0.05; }
//if(!(opt_flags & (1 << 4))) tidy_reads = 5000;
break;
}
if(par->ksize + par->psize > KBM_MAX_KSIZE){
fprintf(stderr, " -- Invalid kmer size %d+%d=%d > %d in %s -- %s:%d --\n", par->ksize, par->psize, par->ksize + par->psize, KBM_MAX_KSIZE, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
Expand Down Expand Up @@ -847,7 +856,7 @@ int main(int argc, char **argv){
FILE *dump;
fprintf(KBM_LOGF, "[%s] dump kbm-index (only seqs) to %s ...", date(), dump_seqs); fflush(KBM_LOGF);
dump = open_file_for_write(dump_seqs, NULL, 1);
mem_dump_obj_file(g->kbm, 1, &kbm_obj_desc, 1, 0, dump);
mem_dump_obj_file(kbm, 1, &kbm_obj_desc, 1, 0, dump);
fclose(dump);
fprintf(KBM_LOGF, " Done\n"); fflush(KBM_LOGF);
}
Expand Down

0 comments on commit f460eee

Please sign in to comment.