Skip to content

Commit

Permalink
New command to recruit a subset of reads from rescued reads. New func…
Browse files Browse the repository at this point in the history
…tionality to remove tangles from de Bruijn graphs (#27)
  • Loading branch information
kvg authored Oct 8, 2024
1 parent a311449 commit a57b978
Show file tree
Hide file tree
Showing 5 changed files with 1,293 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/hidive/src/correct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@ pub fn start(
.score_kmers(model_path)
.collapse()
.clean_color_specific_paths(1, 0.2)
.clean_tangles(1, 100, 0.2)
.clean_branches(0.01)
.clean_tips(3*kmer_size, 0.01)
.clean_contigs(100)
.build_links(&all_lr_seqs);
// .build_links(&all_lr_seqs);
;

skydive::elog!("Built MLdBG with {} k-mers.", m.kmers.len());

// let superbubbles = m.identify_superbubbles();
// for superbubble in superbubbles {
Expand Down
34 changes: 34 additions & 0 deletions src/hidive/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ mod fetch;
mod filter;
mod impute;
mod rescue;
mod recruit;
mod train;
mod trim;

Expand Down Expand Up @@ -162,6 +163,30 @@ enum Commands {
seq_paths: Vec<PathBuf>,
},

/// From rescued reads, recruit subset reads that are similar to input long reads.
#[clap(arg_required_else_help = true)]
Recruit {
/// Output path for FASTA file with reads spanning locus of interest.
#[clap(short, long, value_parser, default_value = "/dev/stdout")]
output: PathBuf,

/// Kmer-size.
#[clap(short, long, value_parser, default_value_t = DEFAULT_KMER_SIZE)]
kmer_size: usize,

/// Minimum percentage of k-mers to require before examining a read more carefully.
#[clap(short, long, value_parser, default_value_t = 70)]
min_kmers_pct: usize,

/// FASTA files with reads to use as a filter for finding more reads.
#[clap(short, long, value_parser, required = true)]
fasta_paths: Vec<PathBuf>,

/// FASTA files from which to extract relevant sequences.
#[clap(required = true, value_parser)]
seq_paths: Vec<PathBuf>,
},

/// Optionally further filter rescued reads to those most closely matching a long-read draft assembly.
#[clap(arg_required_else_help = true)]
Filter {
Expand Down Expand Up @@ -382,6 +407,15 @@ fn main() {
} => {
rescue::start(&output, kmer_size, min_kmers, &contigs, &fasta_paths, &seq_paths);
}
Commands::Recruit {
output,
kmer_size,
min_kmers_pct: min_kmers,
fasta_paths,
seq_paths,
} => {
recruit::start(&output, kmer_size, min_kmers, &fasta_paths, &seq_paths);
}
Commands::Filter {
output,
gfa_path,
Expand Down
Loading

0 comments on commit a57b978

Please sign in to comment.