Skip to content

Commit

Permalink
Sampling adjustment (#84)
Browse files Browse the repository at this point in the history
* Entropy Debiasing Adjustment

* fixed deps (#85) (#86)

* fixed deps

* no more lower versions of Python

* no more lower versions of Python

* trying stable rust

* fixing old deps

* fixing old deps

* fixing imports

* clang

* fixing xlsx

* fixing more xlsx crate issues

* fixing more xlsx crate issues

* fixing more xlsx crate issues

* removing xlxs support

* removing xlxs support

* ready to release

* updated lock

* trying to support 3.8

* trying to support more python versions

* updated lock

* no 3.12 cuz maturin issues

* updated lock

---------

Co-authored-by: Shan Tharanga <[email protected]>

---------

Co-authored-by: Eyyub Unlu <[email protected]>
Co-authored-by: Shan Tharanga <[email protected]>
  • Loading branch information
3 people authored Sep 3, 2024
1 parent 3cc713b commit eb96f38
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,8 +594,16 @@ fn calculate_entropy(kmers: &[Box<str>], support_threshold: &usize) -> f64 {
// and we do not need to skew the regression with unreliable data
// We stop at 99% because at 100% we do not do resampling and all kmers are used which we
// calculated above
let mut entropy_values: Vec<(f64, f64)> = (percentage_cutoff..99)
.into_iter()
let starting_point = if percentage_cutoff < 50 {
50
} else {
// Nearest multiple of 5 if between 30 and 50 (inclusive)
(percentage_cutoff + 4) / 5 * 5
};


let mut entropy_values: Vec<(f64, f64)> = (starting_point..100)
.step_by(5)
.map(|percentage| {
// Figure out the number of samples that this % represents
let samples = (percentage * kmer_count) / 100;
Expand All @@ -613,13 +621,17 @@ fn calculate_entropy(kmers: &[Box<str>], support_threshold: &usize) -> f64 {
// One of the data points has to be 100% of the k-mers WITHOUT random sampling
entropy_values.push((1.0 / kmer_count as f64, all_kmers_entropy));

let (_, y) = linear_regression_of(&entropy_values).unwrap();
if entropy_values.len() >= 5 {
let (_, y) = linear_regression_of(&entropy_values).unwrap();

if y < 0_f64 {
return all_kmers_entropy;
}
if y < 0_f64 {
return all_kmers_entropy;
}

y
} else {
return all_kmers_entropy;
}
}

fn get_kmers_and_headers(
Expand Down Expand Up @@ -882,4 +894,4 @@ fn dima(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Variant>()?;
m.add_class::<Results>()?;
Ok(())
}
}

0 comments on commit eb96f38

Please sign in to comment.