diff --git a/src/call_genotypes.rs b/src/call_genotypes.rs index cb03950..6e6ea74 100644 --- a/src/call_genotypes.rs +++ b/src/call_genotypes.rs @@ -197,7 +197,7 @@ pub fn call_genotypes_no_haplotypes( &var.alleles, max_p_miscall, ) - .chain_err(|| "Error calculating genotype posteriors for haplotype-free genotyping")?; + .chain_err(|| "Error calculating genotype posteriors for haplotype-free genotyping")?; // get the genotype with maximum genotype posterior let (max_g, max_post) = posts.max_genotype_post(false, false); @@ -415,7 +415,8 @@ pub fn call_genotypes_with_haplotypes( if var.alleles.len() == 2 && (var.genotype == Genotype(0, 1) || var.genotype == Genotype(1, 0)) && var.alleles[0].len() == 1 - && var.alleles[1].len() == 1 { + && var.alleles[1].len() == 1 + { var_phased[i] = true; if var.genotype == Genotype(0, 1) { @@ -597,8 +598,10 @@ pub fn call_genotypes_with_haplotypes( } // get the value of the read likelihood given each haplotype - let (mut p_read_h0, mut p_read_h1) = (p_read_hap[0][call.frag_ix as usize], - p_read_hap[1][call.frag_ix as usize]); + let (mut p_read_h0, mut p_read_h1) = ( + p_read_hap[0][call.frag_ix as usize], + p_read_hap[1][call.frag_ix as usize], + ); if var_phased[v as usize] { // for each haplotype allele @@ -677,10 +680,10 @@ pub fn call_genotypes_with_haplotypes( if var_phased[v as usize] { for &(frag_ix, p_read_h0, p_read_h1) in &p_read_lst_genotype[max_g.0 as usize][max_g.1 as usize] - { - p_read_hap[0][frag_ix] = p_read_h0; - p_read_hap[1][frag_ix] = p_read_h1; - } + { + p_read_hap[0][frag_ix] = p_read_h0; + p_read_hap[1][frag_ix] = p_read_h1; + } } } @@ -790,7 +793,7 @@ pub fn call_genotypes_with_haplotypes( "{}.{}.haplotype_genotype_iteration.vcf", program_step, hapcut2_iter ) - .to_owned(); + .to_owned(); print_variant_debug( varlist, &interval, @@ -826,11 +829,11 @@ mod tests { fn test_generate_fragcall_pileup() { let fcall = |f_ix, v_ix, a| { FragCall { - frag_ix: f_ix, // index into fragment list - var_ix: v_ix, // index into variant list - allele: a, // allele call + frag_ix: f_ix, // index into fragment list + var_ix: v_ix, // index into variant list + allele: a, // allele call qual: LogProb::from(Prob(0.01)), // LogProb probability the call is an error - one_minus_qual: LogProb::from(Prob(0.99)) + one_minus_qual: LogProb::from(Prob(0.99)), } }; let p50 = LogProb::from(Prob(0.5)); diff --git a/src/call_potential_snvs.rs b/src/call_potential_snvs.rs index ab1e98c..ad74fc3 100644 --- a/src/call_potential_snvs.rs +++ b/src/call_potential_snvs.rs @@ -257,7 +257,7 @@ pub fn call_potential_snvs( 'C' | 'c' => 1, 'G' | 'g' => 2, 'T' | 't' => 3, - _ => 4 + _ => 4, }; counts[b] += 1; diff --git a/src/extract_fragments.rs b/src/extract_fragments.rs index c40a4ec..5ea8206 100644 --- a/src/extract_fragments.rs +++ b/src/extract_fragments.rs @@ -32,10 +32,10 @@ use rust_htslib::bam::record::Cigar; use rust_htslib::bam::record::CigarStringView; use rust_htslib::bam::record::Record; use rust_htslib::bam::Read; -use util::*; -use variants_and_fragments::*; use std::u32; use std::usize; +use util::*; +use variants_and_fragments::*; static VERBOSE: bool = false; static IGNORE_INDEL_ONLY_CLUSTERS: bool = false; @@ -66,7 +66,7 @@ pub struct ExtractFragmentParameters { pub max_cigar_indel: usize, /// whether or not to store the read id. /// we store the read ID if we'll be separating reads by haplotype and otherwise we don't - pub store_read_id: bool + pub store_read_id: bool, } /// an extension of the rust-htslib cigar representation that has the cigar operation and length as @@ -794,7 +794,7 @@ fn extract_var_cluster( var_ix: var_cluster[v as usize].ix, allele: best_allele, qual: qual, - one_minus_qual: LogProb::ln_one_minus_exp(&qual) + one_minus_qual: LogProb::ln_one_minus_exp(&qual), }); } @@ -830,9 +830,8 @@ pub fn extract_fragment( id: Some(id), calls: vec![], // ln(0.5) stored as f16 for compactness - p_read_hap: [LogProb::from(Prob(0.5)), - LogProb::from(Prob(0.5))], - reverse_strand: bam_record.is_reverse() + p_read_hap: [LogProb::from(Prob(0.5)), LogProb::from(Prob(0.5))], + reverse_strand: bam_record.is_reverse(), }; if bam_record.is_quality_check_failed() diff --git a/src/genotype_probs.rs b/src/genotype_probs.rs index d900a72..dc95f9c 100644 --- a/src/genotype_probs.rs +++ b/src/genotype_probs.rs @@ -47,7 +47,6 @@ impl GenotypeProbs { for i in 0..self.n_alleles() { for j in 0..self.n_alleles() { - let post: LogProb = self.tab[i][j]; if post > max_post { diff --git a/src/haplotype_assembly.rs b/src/haplotype_assembly.rs index 0681293..864092f 100644 --- a/src/haplotype_assembly.rs +++ b/src/haplotype_assembly.rs @@ -21,7 +21,6 @@ pub fn separate_fragments_by_haplotype( let mut unassigned_count = 0; for ref f in flist { - // we store p_read_hap as ln-scaled f16s to save space. need to convert back. let p_read_hap0 = LogProb(f64::from(f.p_read_hap[0])); let p_read_hap1 = LogProb(f64::from(f.p_read_hap[1])); @@ -42,7 +41,9 @@ pub fn separate_fragments_by_haplotype( unassigned_count += 1; } } - &None => {bail!("Fragment without read ID found while separating reads by haplotype.");} + &None => { + bail!("Fragment without read ID found while separating reads by haplotype."); + } } } @@ -176,8 +177,8 @@ pub fn generate_flist_buffer( line.push(' ' as u8); let fid = match &frag.id { - Some(ref fid) => {fid.clone()} - None => {frag_num.to_string()} + Some(ref fid) => fid.clone(), + None => frag_num.to_string(), }; for u in fid.clone().into_bytes() { @@ -251,7 +252,6 @@ extern "C" { ); } - pub fn call_hapcut2( frag_buffer: &Vec>, fragments: usize, @@ -334,7 +334,8 @@ pub fn calculate_mec( match var.phase_set { Some(ps) => { *block_mec.entry(ps).or_insert(0) += var.mec; - *block_total.entry(ps).or_insert(0) += var.allele_counts.iter().sum::() as usize; + *block_total.entry(ps).or_insert(0) += + var.allele_counts.iter().sum::() as usize; } None => {} } @@ -344,13 +345,13 @@ pub fn calculate_mec( match var.phase_set { Some(ps) => { var.mec_frac_block = *block_mec + .get(&ps) + .chain_err(|| "Error retrieving MEC for phase set.")? + as f64 + / *block_total .get(&ps) .chain_err(|| "Error retrieving MEC for phase set.")? - as f64 - / *block_total - .get(&ps) - .chain_err(|| "Error retrieving MEC for phase set.")? - as f64; + as f64; var.mec_frac_variant = var.mec as f64 / var.allele_counts.iter().sum::() as f64; } diff --git a/src/main.rs b/src/main.rs index 60c2ef9..a69dcf8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -374,7 +374,8 @@ fn run() -> Result<()> { let min_allele_qual: f64 = parse_nonnegative_f64(&input_args, "Min allele quality")?; let strand_bias_pvalue_cutoff: f64 = parse_nonnegative_f64(&input_args, "Strand Bias P-value cutoff")?; - let hap_assignment_qual: f64 = parse_nonnegative_f64(&input_args, "Haplotype assignment quality")?; + let hap_assignment_qual: f64 = + parse_nonnegative_f64(&input_args, "Haplotype assignment quality")?; let ll_delta: f64 = parse_positive_f64(&input_args, "Haplotype Convergence Delta")?; let potential_snv_cutoff_phred = parse_positive_f64(&input_args, "Potential SNV Cutoff")?; let potential_snv_min_alt_count: usize = @@ -528,7 +529,7 @@ fn run() -> Result<()> { variant_cluster_max_size: variant_cluster_max_size, max_window_padding, max_cigar_indel, - store_read_id + store_read_id, }; eprintln!("{} Estimating alignment parameters...", print_time()); @@ -819,14 +820,15 @@ fn run() -> Result<()> { // write BAM files for h1,h2, and unassigned match hap_bam_prefix { Some(p) => { - eprintln!( "{} Calculating fraction of reads assigned to either haplotype...", print_time() ); // h1 and h2 are hash-sets containing the qnames of the reads assigned to haplotype 1 and 2 respectively. - let (h1, h2) = - separate_fragments_by_haplotype(&flist, LogProb::from(Prob(1.0 - hap_max_p_misassign)))?; + let (h1, h2) = separate_fragments_by_haplotype( + &flist, + LogProb::from(Prob(1.0 - hap_max_p_misassign)), + )?; eprintln!( "{} Writing haplotype-assigned reads to bam files...", diff --git a/src/realignment.rs b/src/realignment.rs index f212784..da354f1 100644 --- a/src/realignment.rs +++ b/src/realignment.rs @@ -126,7 +126,6 @@ pub fn forward_algorithm_non_numerically_stable( middle_prev[j] = 0.0; } - let t = params.transition_probs; let e = params.emission_probs; @@ -143,19 +142,16 @@ pub fn forward_algorithm_non_numerically_stable( w.len() }; - if band_start == 1 { upper_curr[0] = 0.0; middle_curr[0] = 0.0; if i == 1 { lower_curr[0] = params.transition_probs.insertion_from_match } else { - lower_curr[0] = - lower_prev[0] * params.transition_probs.insertion_from_insertion; + lower_curr[0] = lower_prev[0] * params.transition_probs.insertion_from_insertion; } } - for j in band_start..(band_end + 1) { let lower_continue = lower_prev[j] * t.insertion_from_insertion; let lower_from_middle = middle_prev[j] * t.insertion_from_match; @@ -177,7 +173,7 @@ pub fn forward_algorithm_non_numerically_stable( match_emission * (middle_from_lower + middle_continue + middle_from_upper); } - for j in (band_start-1)..(band_end + 1) { + for j in (band_start - 1)..(band_end + 1) { upper_prev[j] = upper_curr[j]; middle_prev[j] = middle_curr[j]; lower_prev[j] = lower_curr[j]; @@ -185,9 +181,9 @@ pub fn forward_algorithm_non_numerically_stable( // we previously had a bug at the left boundary of the band... set these to NaN to make sure they // aren't used again if band_start >= 2 { - upper_prev[band_start-2] = f64::NAN; - middle_prev[band_start-2] = f64::NAN; - lower_prev[band_start-2] = f64::NAN; + upper_prev[band_start - 2] = f64::NAN; + middle_prev[band_start - 2] = f64::NAN; + lower_prev[band_start - 2] = f64::NAN; } upper_curr[band_start] = 0.0; @@ -245,8 +241,7 @@ pub fn forward_algorithm_numerically_stable( if i == 1 { lower_curr[0] = params.transition_probs.insertion_from_match } else { - lower_curr[0] = - lower_prev[0] + params.transition_probs.insertion_from_insertion; + lower_curr[0] = lower_prev[0] + params.transition_probs.insertion_from_insertion; } } @@ -271,7 +266,7 @@ pub fn forward_algorithm_numerically_stable( middle_curr[j] = match_emission + LogProb::ln_sum_exp(&options3); } - for j in (band_start-1)..(band_end + 1) { + for j in (band_start - 1)..(band_end + 1) { upper_prev[j] = upper_curr[j]; middle_prev[j] = middle_curr[j]; lower_prev[j] = lower_curr[j]; @@ -279,9 +274,9 @@ pub fn forward_algorithm_numerically_stable( // we previously had a bug at the left boundary of the band... set these to NaN to make sure they // aren't used again if band_start >= 2 { - upper_prev[band_start-2] = LogProb(f64::NAN); - middle_prev[band_start-2] = LogProb(f64::NAN); - lower_prev[band_start-2] = LogProb(f64::NAN); + upper_prev[band_start - 2] = LogProb(f64::NAN); + middle_prev[band_start - 2] = LogProb(f64::NAN); + lower_prev[band_start - 2] = LogProb(f64::NAN); } upper_curr[band_start] = LogProb::ln_zero(); @@ -317,7 +312,6 @@ pub fn viterbi_max_scoring_alignment( upper_prev[j] = upper_prev[j - 1] + params.transition_probs.deletion_from_deletion; } - for i in 1..(v.len() + 1) { let band_middle = (w.len() * i) / v.len(); let band_start = if band_middle >= band_width / 2 + 1 { @@ -336,12 +330,10 @@ pub fn viterbi_max_scoring_alignment( if i == 1 { lower_curr[0] = params.transition_probs.insertion_from_match } else { - lower_curr[0] = - lower_prev[0] + params.transition_probs.insertion_from_insertion; + lower_curr[0] = lower_prev[0] + params.transition_probs.insertion_from_insertion; } } - for j in band_start..(band_end + 1) { let lower_continue = lower_prev[j] + t.insertion_from_insertion; let lower_from_middle = middle_prev[j] + t.insertion_from_match; @@ -377,7 +369,7 @@ pub fn viterbi_max_scoring_alignment( middle_curr[j] = match_emission + max_option; } - for j in (band_start-1)..(band_end + 1) { + for j in (band_start - 1)..(band_end + 1) { upper_prev[j] = upper_curr[j]; middle_prev[j] = middle_curr[j]; lower_prev[j] = lower_curr[j]; @@ -385,9 +377,9 @@ pub fn viterbi_max_scoring_alignment( // we previously had a bug at the left boundary of the band... set these to NaN to make sure they // aren't used again if band_start >= 2 { - upper_prev[band_start-2] = LogProb(f64::NAN); - middle_prev[band_start-2] = LogProb(f64::NAN); - lower_prev[band_start-2] = LogProb(f64::NAN); + upper_prev[band_start - 2] = LogProb(f64::NAN); + middle_prev[band_start - 2] = LogProb(f64::NAN); + lower_prev[band_start - 2] = LogProb(f64::NAN); } upper_curr[band_start] = LogProb::ln_zero(); diff --git a/src/variants_and_fragments.rs b/src/variants_and_fragments.rs index fe3704d..7171d2e 100644 --- a/src/variants_and_fragments.rs +++ b/src/variants_and_fragments.rs @@ -16,11 +16,11 @@ use util::*; #[derive(Clone, Copy)] pub struct FragCall { - pub frag_ix: usize, // index into fragment list + pub frag_ix: usize, // index into fragment list pub var_ix: usize, // index into variant list pub allele: u8, // allele call pub qual: LogProb, // LogProb probability the call is an error - pub one_minus_qual: LogProb, // LogProb 1-probability the call is an error + pub one_minus_qual: LogProb, // LogProb 1-probability the call is an error } #[derive(Clone)] @@ -28,7 +28,7 @@ pub struct Fragment { pub id: Option, pub calls: Vec, pub p_read_hap: [LogProb; 2], - pub reverse_strand: bool + pub reverse_strand: bool, } #[repr(u8)] @@ -110,7 +110,7 @@ pub struct Var { // e.g. genotype_posteriors[2][0] is the log posterior probability of 2|0 haplotype pub phase_set: Option, pub strand_bias_pvalue: f64, // fisher's exact test strand bias Pvalue - pub mec: usize, // mec for variant + pub mec: usize, // mec for variant pub mec_frac_variant: f64, // mec fraction for this variant pub mec_frac_block: f64, // mec fraction for this haplotype block pub mean_allele_qual: f64, @@ -244,7 +244,7 @@ pub fn parse_vcf_potential_variants( mq20_frac: 0.0, mq30_frac: 0.0, mq40_frac: 0.0, - mq50_frac: 0.0 + mq50_frac: 0.0, }; varlist.push(new_var); }