Skip to content

Commit

Permalink
Fix minor lint warnings and improve code quality (#25)
Browse files Browse the repository at this point in the history
* optimize imports and remove unused imports
* unused variables commented out or underscore applied
* comment unused functions
* snake based name for anchorseq
* clippy: add #[must_use] to functions
* Fix: Disambiguate `to_owned` method call in `graph.insert` to resolve multiple applicable items error
* Fix: Correct function signatures to resolve Clippy lints errors
* Fix: Change argument to be passed by reference instead of by value
* Fix: error: boolean to int conversion using if. for in_degree and out_degreee functions
* Fix: inefficient_to_string. error: calling `to_string` on `&&std::string::String`
* Fix: cast_possible_truncation. error: casting `usize` to `u8` may truncate the value
* Fix: cast_possible_truncation. Example error: casting `usize` to `u8` may truncate the value
* added function doc for function which may panic (cargo lint)
* Fix items in documentation is missing backticks (cargo lint)
* Fix "method/function could have a `#[must_use]` attribute" (cargo lint)
* Fix "Variables can be used directly in the format" (cargo lint)
* Fix "error: needless use of `for_each`" (cargo lint)
* Fix "avoid wildcard imports" (cargo lint)
* Fix "avoid implicitly cloning" (cargo lint)
* Fix "docs for function returning `Result` missing `# Errors` section" (cargo lint)
* Fix "test function passing of variable type" (cargo lint)
* Fix:removed enumerate where needed, unused imports, unused variables (cargo lint)

---------

Co-authored-by: bshifaw <[email protected]>
  • Loading branch information
bshifaw and bshifaw authored Oct 22, 2024
1 parent c066131 commit 26b9c3a
Show file tree
Hide file tree
Showing 27 changed files with 1,008 additions and 325 deletions.
63 changes: 30 additions & 33 deletions src/hidive/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,18 @@
use std::collections::HashSet;
use std::path::PathBuf;

use bio::io::fasta::{IndexedReader, Reader, Record};
use bio::io::fasta::{Reader, Record};

use flate2::read::GzDecoder;
use serde_json::Value;

// Import the skydive module, which contains the necessary functions for building graphs
use skydive;

use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{self, BufRead, BufReader, Write};
use std::sync::Mutex;

pub fn get_reference_kmer_profile(ref_hla: &str, k: usize) -> Vec<String> {
let mut ref_kmer_profile: HashMap<String, i32> = HashMap::new();
Expand Down Expand Up @@ -191,7 +188,7 @@ pub fn get_final_anchor(
let mut last_position = 0;

for anchor in anchornames {
let mut position = anchor_info[anchor].pos;
let position = anchor_info[anchor].pos;
if position > last_position + k {
anchor_unadjacent_list.push(anchor.to_string());
last_position = position;
Expand Down Expand Up @@ -223,10 +220,10 @@ pub fn mapping_info(
let anchor_rev = reverse_complement(&anchor_seq);
position_dict
.entry(anchor_seq.clone())
.or_insert_with(|| Vec::new());
.or_default();
position_dict
.entry(anchor_rev.clone())
.or_insert_with(|| Vec::new());
.or_default();
}

for i in 0..contig.len() - k + 1 {
Expand Down Expand Up @@ -258,7 +255,7 @@ pub fn construct_edges(
contig: String,
contigname: String,
sample: String,
Anchorseq: &HashMap<String, String>,
anchorseq: &HashMap<String, String>,
) -> EdgeInfo {
let src_seq: String;
let mut pr = false;
Expand All @@ -270,12 +267,12 @@ pub fn construct_edges(
pr = true;
} else {
src_seq = contig.chars().skip(src_pos).take(k).collect();
src = match Anchorseq.get(&src_seq) {
src = match anchorseq.get(&src_seq) {
Some(value) => value.clone(),
None => {
let reversed_src_seq = reverse_complement(&src_seq);
pr = true;
Anchorseq
anchorseq
.get(&reversed_src_seq)
.unwrap_or(&"".to_string())
.clone()
Expand All @@ -292,12 +289,12 @@ pub fn construct_edges(
sr = true;
} else {
dst_seq = contig.chars().skip(dst_pos).take(k).collect::<String>();
dst = match Anchorseq.get(&dst_seq) {
dst = match anchorseq.get(&dst_seq) {
Some(value) => value.clone(),
None => {
let reversed_dst_seq = reverse_complement(&dst_seq);
sr = true;
Anchorseq
anchorseq
.get(&reversed_dst_seq)
.unwrap_or(&"".to_string())
.clone()
Expand All @@ -323,8 +320,8 @@ pub fn construct_edges(

EdgeInfo {
seq: edge_seq,
src: src,
dst: dst,
src,
dst,
reads: vec![contigname],
samples: vec![sample].into_iter().collect(),
}
Expand Down Expand Up @@ -352,7 +349,7 @@ pub fn create_edge_file(
.last()
.unwrap_or_default()
.to_string();
let (a, svs) = mapping_info(&final_anchor, contig.to_string(), k);
let (a, _svs) = mapping_info(final_anchor, contig.to_string(), k);
let mut splitposlist: Vec<_> = a.values().filter_map(|&x| x).collect();
splitposlist.sort();
let mut edgeindex = 0;
Expand All @@ -368,7 +365,7 @@ pub fn create_edge_file(
&anchorseq,
);
let src = &e.src;
let edgelist = outgoing.entry(src.clone()).or_insert_with(|| Vec::new());
let edgelist = outgoing.entry(src.clone()).or_default();
if let Some(pos) = edgelist
.iter()
.position(|edge| edge_info[edge].dst == e.dst && edge_info[edge].seq == e.seq)
Expand Down Expand Up @@ -396,7 +393,7 @@ pub fn create_edge_file(
&anchorseq,
);
let src = &e.src;
let edgelist = outgoing.entry(src.clone()).or_insert_with(|| Vec::new());
let edgelist = outgoing.entry(src.clone()).or_default();
if let Some(pos) = edgelist
.iter()
.position(|edge| edge_info[edge].dst == e.dst && edge_info[edge].seq == e.seq)
Expand Down Expand Up @@ -567,8 +564,8 @@ impl GraphicalGenome {
Ok(GraphicalGenome {
anchor: anchor_dict,
edges: edge_dict,
outgoing: outgoing,
incoming: incoming,
outgoing,
incoming,
})
}
}
Expand Down Expand Up @@ -596,7 +593,7 @@ impl FindAllPathBetweenAnchors {
g: &GraphicalGenome,
start: &str,
end: &str,
mut sofar: Vec<String>,
sofar: Vec<String>,
depth: usize,
readset: HashSet<String>,
) {
Expand Down Expand Up @@ -651,12 +648,12 @@ pub fn reconstruct_path_seq(graph: &GraphicalGenome, path: &[String]) -> String
for item in path {
if item.starts_with('A') {
if let Some(anchor) = graph.anchor.get(item) {
seq += &anchor["seq"].as_str().unwrap_or_default(); // Assuming `anchor` is a HashMap and "seq" is a key
seq += anchor["seq"].as_str().unwrap_or_default(); // Assuming `anchor` is a HashMap and "seq" is a key
// println!("{:?}", anchor["seq"].as_str().unwrap_or_default());
}
} else if item.starts_with("E") {
if let Some(edge) = graph.edges.get(item) {
seq += &edge["seq"].as_str().unwrap_or_default(); // Assuming `edges` is a HashMap and "seq" is a key
seq += edge["seq"].as_str().unwrap_or_default(); // Assuming `edges` is a HashMap and "seq" is a key
}
}
}
Expand All @@ -665,7 +662,7 @@ pub fn reconstruct_path_seq(graph: &GraphicalGenome, path: &[String]) -> String

pub fn find_all_reads(graph: &GraphicalGenome) -> HashSet<String> {
let mut read_sets = HashSet::new();
for (item, edge) in &graph.edges {
for (_item, edge) in &graph.edges {
if let Some(readlist) = edge.get("reads").and_then(|r| r.as_array()) {
for read in readlist {
if let Some(read_str) = read.as_str() {
Expand Down Expand Up @@ -738,15 +735,15 @@ impl GetSeriesParallelGraph {
let mut node = Self::find_furthest_node(&node_candidate, subgraph, &start_node);
nodelist.push(node.clone());

while node != end_node && node != "" {
while node != end_node && !node.is_empty() {
let edgelist = &subgraph.outgoing[&node];
let mut node_candidate: Vec<String> = Vec::new();
for edge in edgelist {
let nodelist = &subgraph.outgoing[edge];
if nodelist.contains(&"SINK".to_string()) {
continue;
}
if nodelist[0] != ""
if !nodelist[0].is_empty()
&& subgraph.anchor.contains_key(&nodelist[0])
&& subgraph.outgoing.contains_key(&nodelist[0])
{
Expand Down Expand Up @@ -788,11 +785,11 @@ impl GetSeriesParallelGraph {
FindAllPathBetweenAnchors::new(subgraph, start_node, end_node, initial_set.clone());
let mut index = 0;
for (p, rs) in path.subpath.iter() {
let edgename = format!(
"E{:05}.{:04}",
start_node[1..].parse::<usize>().unwrap(),
index
);
// let edgename = format!(
// "E{:05}.{:04}",
// start_node[1..].parse::<usize>().unwrap(),
// index
// );
let seq = reconstruct_path_seq(subgraph, &p[1..p.len() - 1]);
let edgelist = outgoing_dict
.get(start_node)
Expand Down Expand Up @@ -966,7 +963,7 @@ pub fn start(output: &PathBuf, k: usize, fasta_path: &PathBuf, reference_name: S
let anchors = get_anchors(&anchorlist, &position_dict, k, &stem);

let final_anchor = get_final_anchor(&anchors, k);
let (edge_info, outgoing) = create_edge_file(&hla_seq, &final_anchor, k);
let (edge_info, _outgoing) = create_edge_file(&hla_seq, &final_anchor, k);

let dereferenced_final_anchor = final_anchor
.iter()
Expand All @@ -977,7 +974,7 @@ pub fn start(output: &PathBuf, k: usize, fasta_path: &PathBuf, reference_name: S
let filtered_edges = filter_undersupported_edges(&edge_info, &stem, 4);

// Write final graph to disk.
write_gfa(
let _ = write_gfa(
&dereferenced_final_anchor,
&filtered_edges,
output.to_str().unwrap(),
Expand All @@ -989,7 +986,7 @@ pub fn start(output: &PathBuf, k: usize, fasta_path: &PathBuf, reference_name: S
let mut sp_graph = GetSeriesParallelGraph::new(&graph);
let outputfilename_str = output.with_extension("sp.gfa");
// println!("{:?}", outputfilename_str);
write_graph_from_graph(outputfilename_str.to_str().unwrap(), &mut sp_graph);
let _ = write_graph_from_graph(outputfilename_str.to_str().unwrap(), &mut sp_graph);
println!("{:?}", outputfilename_str);
// write_graph_from_graph("HLA-A.sp.gfa", &mut sp_graph);
}
4 changes: 2 additions & 2 deletions src/hidive/src/cluster.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Import necessary standard library modules
use std::collections::{HashMap, HashSet};
use std::collections::HashSet;
use std::io::Write;
use std::path::PathBuf;

Expand All @@ -20,8 +20,8 @@ pub fn start(output: &PathBuf, k: usize, jaccard_threshold: f64, fasta_path: &Pa
let mut sample_names = Vec::new();
for record in &all_reads {
let id = record.id().to_string();
let v: Vec<String> = id.split('|').map(|s| s.to_string()).collect();

//let v: Vec<String> = id.split('|').map(|s| s.to_string()).collect();
// sample_names.push(v[1].clone());
sample_names.push(id);
}
Expand Down
14 changes: 7 additions & 7 deletions src/hidive/src/coassemble.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ fn create_fully_phased_haplotypes(lr_msas: &Vec<String>, h1: &Vec<u8>) -> (Strin
let combined_base_counts = allele_counts(lr_msas, index1, index1+1);
// let bases = allele_indices(lr_msas, index1, index1+1);

if combined_base_counts.len() == 0 {
if combined_base_counts.is_empty() {
index1 += 1;
} else if combined_base_counts.len() == 1 {
let base = combined_base_counts.keys().next().unwrap();
Expand All @@ -145,8 +145,8 @@ fn create_fully_phased_haplotypes(lr_msas: &Vec<String>, h1: &Vec<u8>) -> (Strin

if allele_counts.len() == 1 {
for (allele, _) in allele_counts {
hap1.extend(allele.chars());
hap2.extend(allele.chars());
hap1.push_str(&allele);
hap2.push_str(&allele);
}
} else {
// let alleles = get_allele_indices(lr_msas, sr_msas, index1, index2);
Expand All @@ -155,9 +155,9 @@ fn create_fully_phased_haplotypes(lr_msas: &Vec<String>, h1: &Vec<u8>) -> (Strin
let mut phase = h1[hap_index] == 1;
for (allele, _) in allele_counts {
if !phase {
hap1.extend(allele.chars());
hap1.push_str(&allele);
} else {
hap2.extend(allele.chars());
hap2.push_str(&allele);
}

phase = !phase;
Expand Down Expand Up @@ -225,7 +225,7 @@ fn create_read_allele_matrix(lr_msas: &Vec<String>) -> Vec<BTreeMap<usize, Strin
alleles.iter().enumerate().for_each(|(i, a)| {
if *a == *allele {
// column.insert(i, allele.clone());
column.insert(i, String::from(allele_index.to_string()));
column.insert(i, allele_index.to_string());
}
});

Expand Down Expand Up @@ -254,7 +254,7 @@ fn allele_indices(lr_msas: &Vec<String>, index1: usize, index2: usize) -> Vec<St
fn allele_counts(lr_msas: &Vec<String>, index1: usize, index2: usize) -> BTreeMap<String, i32> {
let combined_allele_counts = lr_msas.iter()
.map(|msa| msa[index1..index2].to_string().replace(" ", ""))
.filter(|allele| allele.len() > 0)
.filter(|allele| !allele.is_empty())
.fold(BTreeMap::new(), |mut counts, base| {
*counts.entry(base).or_insert(0) += 1;
counts
Expand Down
2 changes: 1 addition & 1 deletion src/hidive/src/correct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn start(

skydive::elog!("Writing reads to {}", output.display());

let mut fa_file = File::create(&output).unwrap();
let mut fa_file = File::create(output).unwrap();
for (i, corrected_seq) in corrected_seqs.iter().enumerate() {
let _ = writeln!(fa_file, ">corrected_{}\n{}", i, String::from_utf8(corrected_seq.clone()).unwrap());
}
Expand Down
10 changes: 5 additions & 5 deletions src/hidive/src/eval_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ pub fn start(
truth_seq_paths: &Vec<PathBuf>,
model_path: &PathBuf,
) {
let long_read_seq_urls = skydive::parse::parse_file_names(&long_read_seq_paths);
let short_read_seq_urls = skydive::parse::parse_file_names(&short_read_seq_paths);
let truth_seq_urls = skydive::parse::parse_file_names(&truth_seq_paths);
let long_read_seq_urls = skydive::parse::parse_file_names(long_read_seq_paths);
let short_read_seq_urls = skydive::parse::parse_file_names(short_read_seq_paths);
let truth_seq_urls = skydive::parse::parse_file_names(truth_seq_paths);


// Read all long reads.
Expand All @@ -42,15 +42,15 @@ pub fn start(

// load model
skydive::elog!("Loading GBDT model from {}...", model_path.to_str().unwrap());
let mut gbdt = GBDT::load_model(model_path.to_str().unwrap()).expect("Unable to load model");
let gbdt = GBDT::load_model(model_path.to_str().unwrap()).expect("Unable to load model");

// Prepare test data.
let test_kmers = l1
.kmers
.keys()
.chain(s1.kmers.keys())
.chain(t1.kmers.keys());
let mut test_data: DataVec = create_dataset_for_model(
let test_data: DataVec = create_dataset_for_model(
test_kmers,
&lr_distances,
&sr_distances,
Expand Down
3 changes: 1 addition & 2 deletions src/hidive/src/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::path::PathBuf;
use path_absolutize::Absolutize;

// Import the skydive module, which contains the necessary functions for staging data
use skydive;

/// Starts the data fetching process.
///
Expand All @@ -28,7 +27,7 @@ pub fn start(output: &PathBuf, loci_list: &Vec<String>, padding: u64, seq_paths:

// Call the stage_data function from the skydive module to process and stage the data
skydive::elog!("Fetching data...");
let r = skydive::stage::stage_data(&output, &loci, &seq_urls, false, &cache_path);
let r = skydive::stage::stage_data(output, &loci, &seq_urls, false, &cache_path);

match r {
Ok(n) => {
Expand Down
6 changes: 2 additions & 4 deletions src/hidive/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub fn start(output: &PathBuf, gfa_path: &PathBuf, short_read_fasta_paths: &Vec<
let mut filtered_sr_seqs: Vec<Vec<u8>> = Vec::new();
for short_read_seq_url in &short_read_seq_urls {
let basename = skydive::utils::basename_without_extension(
&short_read_seq_url,
short_read_seq_url,
&[".fasta.gz", ".fa.gz", ".fasta", ".fa"],
);
let fasta_path = short_read_seq_url.to_file_path().unwrap();
Expand All @@ -69,9 +69,7 @@ pub fn start(output: &PathBuf, gfa_path: &PathBuf, short_read_fasta_paths: &Vec<
let fasta_reader = bio::io::fasta::Reader::new(reader);
let all_reads = fasta_reader.records().flatten().collect::<Vec<_>>();

let progress_bar = skydive::utils::default_unbounded_progress_bar(format!(
"Filtering short reads (0 retained)",
));
let progress_bar = skydive::utils::default_unbounded_progress_bar("Filtering short reads (0 retained)".to_string());

// Create some thread-safe counters.
let found_items = Arc::new(AtomicUsize::new(0));
Expand Down
1 change: 0 additions & 1 deletion src/hidive/src/impute.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::path::PathBuf;

use skydive;

pub fn start(output: &PathBuf, graph: &PathBuf) {
println!("The answer is {:?} {:?}!", output, graph)
Expand Down
8 changes: 4 additions & 4 deletions src/hidive/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,15 +525,15 @@ fn elapsed_time(start_time: std::time::Instant) -> String {
let elapsed_time = end_time.duration_since(start_time);

let elapsed_secs = elapsed_time.as_secs_f64();
let elapsed_str = if elapsed_secs < 60.0 {


if elapsed_secs < 60.0 {
format!("{:.2} seconds", elapsed_secs)
} else if elapsed_secs < 3600.0 {
format!("{:.2} minutes", elapsed_secs / 60.0)
} else if elapsed_secs < 86400.0 {
format!("{:.2} hours", elapsed_secs / 3600.0)
} else {
format!("{:.2} days", elapsed_secs / 86400.0)
};

elapsed_str
}
}
Loading

0 comments on commit 26b9c3a

Please sign in to comment.