Merge pull request #85 from jinlow/refactor/clean-up-evaluation

Refactor/clean up evaluation
jinlow · Oct 17, 2023 · 8e91a76 · 8e91a76
2 parents 9fd0347 + 520ab79
commit 8e91a76
Show file tree

Hide file tree

Showing 13 changed files with 78 additions and 83 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "forust-ml"
-version = "0.4.0"
+version = "0.4.1"
 edition = "2021"
 authors = ["James Inlow <[email protected]>"]
 homepage = "https://github.com/jinlow/forust"

diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ pip install forust
 
 To use in a rust project add the following to your Cargo.toml file.
 ```toml
-forust-ml = "0.4.0"
+forust-ml = "0.4.1"
 ```
 
 ## Usage

diff --git a/benches/forust_benchmarks.rs b/benches/forust_benchmarks.rs
@@ -20,20 +20,15 @@ pub fn tree_benchmarks(c: &mut Criterion) {
     let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
     let yhat = vec![0.5; y.len()];
     let w = vec![1.; y.len()];
-    let g = LogLoss::calc_grad(&y, &yhat, &w);
-    let h = LogLoss::calc_hess(&y, &yhat, &w);
+    let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
     let v: Vec<f32> = vec![10.; 300000];
     c.bench_function("Niave Sum", |b| b.iter(|| naive_sum(black_box(&v))));
     c.bench_function("fast sum", |b| b.iter(|| fast_sum(black_box(&v))));
     c.bench_function("fast f64 sum", |b| b.iter(|| fast_f64_sum(black_box(&v))));
 
-    c.bench_function("calc_grad", |b| {
-        b.iter(|| LogLoss::calc_grad(black_box(&y), black_box(&yhat), black_box(&w)))
-    });
-
-    c.bench_function("calc_hess", |b| {
-        b.iter(|| LogLoss::calc_hess(black_box(&y), black_box(&yhat), black_box(&w)))
+    c.bench_function("calc_grad_hess", |b| {
+        b.iter(|| LogLoss::calc_grad_hess(black_box(&y), black_box(&yhat), black_box(&w)))
     });
 
     let data = Matrix::new(&data_vec, y.len(), 5);

diff --git a/py-forust/Cargo.toml b/py-forust/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "py-forust"
-version = "0.4.0"
+version = "0.4.1"
 edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
 
 [dependencies]
 pyo3 = { version = "0.19.0", features = ["extension-module"] }
-forust-ml = { version = "0.4.0", path = "../" }
+forust-ml = { version = "0.4.1", path = "../" }
 numpy = "0.19.0"
 ndarray = "0.15.1"
 serde_plain = { version = "1.0" }

diff --git a/py-forust/forust/__init__.py b/py-forust/forust/__init__.py
@@ -68,6 +68,7 @@ class BoosterType(Protocol):
     best_iteration: None | int
     base_score: float
     terminate_missing_features: set[int]
+    number_of_trees: int
 
     def fit(
         self,
@@ -944,6 +945,15 @@ def prediction_iteration(self) -> int | None:
         """
         return self.booster.prediction_iteration
 
+    @property
+    def number_of_trees(self) -> int:
+        """The number of trees in the model.
+
+        Returns:
+            int: The total number of trees in the model.
+        """
+        return self.booster.number_of_trees
+
     def get_best_iteration(self) -> int | None:
         """Get the best iteration if `early_stopping_rounds` was used when fitting.
 

diff --git a/py-forust/src/lib.rs b/py-forust/src/lib.rs
@@ -193,6 +193,11 @@ impl GradientBooster {
         Ok(self.booster.base_score)
     }
 
+    #[getter]
+    fn number_of_trees(&self) -> PyResult<usize> {
+        Ok(self.booster.trees.len())
+    }
+
     pub fn fit(
         &mut self,
         flat_data: PyReadonlyArray1<f64>,

diff --git a/py-forust/tests/test_booster.py b/py-forust/tests/test_booster.py
@@ -950,6 +950,7 @@ def test_early_stopping_with_dev_val(X_y):
     assert len(n_trees) == model.get_best_iteration() + 5
     assert len(n_trees) == model.get_evaluation_history().shape[0]
     assert model.get_best_iteration() < 99
+    assert model.number_of_trees == model.get_best_iteration() + 5
 
 
 def test_goss_sampling_method(X_y):

diff --git a/rs-example.md b/rs-example.md
@@ -3,7 +3,7 @@
 To run this example, add the following code to your `Cargo.toml` file.
 ```toml
 [dependencies]
-forust-ml = "0.4.0"
+forust-ml = "0.4.1"
 polars = "0.28"
 reqwest = { version = "0.11", features = ["blocking"] }
 ```

diff --git a/src/histogram.rs b/src/histogram.rs
@@ -250,8 +250,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let g = LogLoss::calc_grad(&y, &yhat, &w);
-        let h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
         let hist =
             create_feature_histogram(&bdata.get_col(1), &b.cuts.get_col(1), &g, &h, &bdata.index);
         // println!("{:?}", hist);

diff --git a/src/objective.rs b/src/objective.rs
@@ -26,8 +26,6 @@ pub fn calc_init_callables(objective_type: &ObjectiveType) -> fn(&[f64], &[f64])
 pub trait ObjectiveFunction {
     fn calc_loss(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32>;
     fn calc_grad_hess(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> (Vec<f32>, Vec<f32>);
-    fn calc_grad(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32>;
-    fn calc_hess(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32>;
     fn calc_init(y: &[f64], sample_weight: &[f64]) -> f64;
     fn default_metric() -> Metric;
 }
@@ -73,27 +71,27 @@ impl ObjectiveFunction for LogLoss {
             .unzip()
     }
 
-    #[inline]
-    fn calc_grad(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
-        y.iter()
-            .zip(yhat)
-            .zip(sample_weight)
-            .map(|((y_, yhat_), w_)| {
-                let yhat_ = f64::ONE / (f64::ONE + (-*yhat_).exp());
-                ((yhat_ - *y_) * *w_) as f32
-            })
-            .collect()
-    }
-    #[inline]
-    fn calc_hess(_: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
-        yhat.iter()
-            .zip(sample_weight)
-            .map(|(yhat_, w_)| {
-                let yhat_ = f64::ONE / (f64::ONE + (-*yhat_).exp());
-                (yhat_ * (f64::ONE - yhat_) * *w_) as f32
-            })
-            .collect()
-    }
+    // #[inline]
+    // fn calc_grad(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
+    //     y.iter()
+    //         .zip(yhat)
+    //         .zip(sample_weight)
+    //         .map(|((y_, yhat_), w_)| {
+    //             let yhat_ = f64::ONE / (f64::ONE + (-*yhat_).exp());
+    //             ((yhat_ - *y_) * *w_) as f32
+    //         })
+    //         .collect()
+    // }
+    // #[inline]
+    // fn calc_hess(_: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
+    //     yhat.iter()
+    //         .zip(sample_weight)
+    //         .map(|(yhat_, w_)| {
+    //             let yhat_ = f64::ONE / (f64::ONE + (-*yhat_).exp());
+    //             (yhat_ * (f64::ONE - yhat_) * *w_) as f32
+    //         })
+    //         .collect()
+    // }
     fn default_metric() -> Metric {
         Metric::LogLoss
     }
@@ -126,19 +124,19 @@ impl ObjectiveFunction for SquaredLoss {
         ytot / ntot
     }
 
-    #[inline]
-    fn calc_grad(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
-        y.iter()
-            .zip(yhat)
-            .zip(sample_weight)
-            .map(|((y_, yhat_), w_)| ((*yhat_ - *y_) * *w_) as f32)
-            .collect()
-    }
+    // #[inline]
+    // fn calc_grad(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> Vec<f32> {
+    //     y.iter()
+    //         .zip(yhat)
+    //         .zip(sample_weight)
+    //         .map(|((y_, yhat_), w_)| ((*yhat_ - *y_) * *w_) as f32)
+    //         .collect()
+    // }
 
-    #[inline]
-    fn calc_hess(_: &[f64], _: &[f64], sample_weight: &[f64]) -> Vec<f32> {
-        sample_weight.iter().map(|v| *v as f32).collect()
-    }
+    // #[inline]
+    // fn calc_hess(_: &[f64], _: &[f64], sample_weight: &[f64]) -> Vec<f32> {
+    //     sample_weight.iter().map(|v| *v as f32).collect()
+    // }
     #[inline]
     fn calc_grad_hess(y: &[f64], yhat: &[f64], sample_weight: &[f64]) -> (Vec<f32>, Vec<f32>) {
         y.iter()
@@ -171,9 +169,9 @@ mod tests {
         let y = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
         let yhat1 = vec![-1.0, -1.0, -1.0, 1.0, 1.0, 1.0];
         let w = vec![1.; y.len()];
-        let g1 = LogLoss::calc_grad(&y, &yhat1, &w);
+        let (g1, _) = LogLoss::calc_grad_hess(&y, &yhat1, &w);
         let yhat2 = vec![0.0, 0.0, -1.0, 1.0, 0.0, 1.0];
-        let g2 = LogLoss::calc_grad(&y, &yhat2, &w);
+        let (g2, _) = LogLoss::calc_grad_hess(&y, &yhat2, &w);
         assert!(g1.iter().sum::<f32>() < g2.iter().sum::<f32>());
     }
 
@@ -182,9 +180,9 @@ mod tests {
         let y = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
         let yhat1 = vec![-1.0, -1.0, -1.0, 1.0, 1.0, 1.0];
         let w = vec![1.; y.len()];
-        let h1 = LogLoss::calc_hess(&y, &yhat1, &w);
+        let (_, h1) = LogLoss::calc_grad_hess(&y, &yhat1, &w);
         let yhat2 = vec![0.0, 0.0, -1.0, 1.0, 0.0, 1.0];
-        let h2 = LogLoss::calc_hess(&y, &yhat2, &w);
+        let (_, h2) = LogLoss::calc_grad_hess(&y, &yhat2, &w);
         assert!(h1.iter().sum::<f32>() < h2.iter().sum::<f32>());
     }
 

diff --git a/src/partial_dependence.rs b/src/partial_dependence.rs
@@ -93,8 +93,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let g = LogLoss::calc_grad(&y, &yhat, &w);
-        let h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
         let data = Matrix::new(&data_vec, 891, 5);
         let splitter = MissingImputerSplitter {

diff --git a/src/splitter.rs b/src/splitter.rs
@@ -119,6 +119,8 @@ pub trait Splitter {
             let left_hessian = cuml_hess;
             let right_gradient = node.gradient_sum - cuml_grad - missing.gradient_sum;
             let right_hessian = node.hessian_sum - cuml_hess - missing.hessian_sum;
+            cuml_grad += bin.gradient_sum;
+            cuml_hess += bin.hessian_sum;
 
             let (mut left_node_info, mut right_node_info, missing_info) = match self.evaluate_split(
                 left_gradient,
@@ -133,8 +135,6 @@ pub trait Splitter {
                 constraint,
             ) {
                 None => {
-                    cuml_grad += bin.gradient_sum;
-                    cuml_hess += bin.hessian_sum;
                     continue;
                 }
                 Some(v) => v,
@@ -156,9 +156,6 @@ pub trait Splitter {
             );
 
             if split_gain <= 0.0 {
-                // Update for new value
-                cuml_grad += bin.gradient_sum;
-                cuml_hess += bin.hessian_sum;
                 continue;
             }
 
@@ -189,9 +186,6 @@ pub trait Splitter {
                     missing_node: missing_info,
                 });
             }
-            // Update for new value
-            cuml_grad += bin.gradient_sum;
-            cuml_hess += bin.hessian_sum;
         }
         split_info
     }
@@ -799,9 +793,9 @@ impl Splitter for MissingImputerSplitter {
         // Don't even worry about it, if there are no missing values
         // in this bin.
         if (missing_gradient != 0.0) || (missing_hessian != 0.0) {
+            // If
             // TODO: Consider making this safer, by casting to f64, summing, and then
             // back to f32...
-
             // The weight if missing went left
             let missing_left_weight = constrained_weight(
                 &self.l2,
@@ -843,10 +837,10 @@ impl Splitter for MissingImputerSplitter {
                 missing_right_weight,
             );
             // Confirm this wouldn't break monotonicity.
-            let missing_left_gain = cull_gain(
-                missing_left_gain,
-                missing_left_weight,
-                right_weight,
+            let missing_right_gain = cull_gain(
+                missing_right_gain,
+                left_weight,
+                missing_right_weight,
                 constraint,
             );
 
@@ -1005,8 +999,7 @@ mod tests {
         let y = vec![0., 0., 0., 1., 1., 0., 1.];
         let yhat = vec![0.; 7];
         let w = vec![1.; y.len()];
-        let grad = LogLoss::calc_grad(&y, &yhat, &w);
-        let hess = LogLoss::calc_hess(&y, &yhat, &w);
+        let (grad, hess) = LogLoss::calc_grad_hess(&y, &yhat, &w);
         let b = bin_matrix(&data, &w, 10, f64::NAN).unwrap();
         let bdata = Matrix::new(&b.binned_data, data.rows, data.cols);
         let index = data.index.to_owned();
@@ -1049,8 +1042,7 @@ mod tests {
         let y = vec![0., 0., 0., 1., 1., 0., 1.];
         let yhat = vec![0.; 7];
         let w = vec![1.; y.len()];
-        let grad = LogLoss::calc_grad(&y, &yhat, &w);
-        let hess = LogLoss::calc_hess(&y, &yhat, &w);
+        let (grad, hess) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
         let b = bin_matrix(&data, &w, 10, f64::NAN).unwrap();
         let bdata = Matrix::new(&b.binned_data, data.rows, data.cols);
@@ -1100,8 +1092,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let grad = LogLoss::calc_grad(&y, &yhat, &w);
-        let hess = LogLoss::calc_hess(&y, &yhat, &w);
+        let (grad, hess) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
         let splitter = MissingImputerSplitter {
             l2: 1.0,

diff --git a/src/tree.rs b/src/tree.rs
@@ -571,8 +571,8 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let mut g = LogLoss::calc_grad(&y, &yhat, &w);
-        let mut h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (mut g, mut h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
+        // let mut h = LogLoss::calc_hess(&y, &yhat, &w);
 
         let data = Matrix::new(&data_vec, 891, 5);
         let splitter = MissingImputerSplitter {
@@ -616,8 +616,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let g = LogLoss::calc_grad(&y, &yhat, &w);
-        let h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
         let data = Matrix::new(&data_vec, 891, 5);
         let splitter = MissingImputerSplitter {
@@ -698,8 +697,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let g = LogLoss::calc_grad(&y, &yhat, &w);
-        let h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
         println!("GRADIENT -- {:?}", h);
 
         let data_ = Matrix::new(&data_vec, 891, 5);
@@ -783,8 +781,7 @@ mod tests {
         let y: Vec<f64> = file.lines().map(|x| x.parse::<f64>().unwrap()).collect();
         let yhat = vec![0.5; y.len()];
         let w = vec![1.; y.len()];
-        let g = LogLoss::calc_grad(&y, &yhat, &w);
-        let h = LogLoss::calc_hess(&y, &yhat, &w);
+        let (g, h) = LogLoss::calc_grad_hess(&y, &yhat, &w);
 
         let data = Matrix::new(&data_vec, 891, 5);
         let splitter = MissingImputerSplitter {