Skip to content

Commit

Permalink
Merge pull request #22 from jinlow/conributions-bug-fix
Browse files Browse the repository at this point in the history
Conributions bug fix
  • Loading branch information
jinlow authored Apr 23, 2023
2 parents 5b53399 + 58eec18 commit 9155f60
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "forust-ml"
version = "0.2.1"
version = "0.2.2"
edition = "2021"
authors = ["James Inlow <[email protected]>"]
homepage = "https://github.com/jinlow/forust"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pip install forust

To use in a rust project add the following to your Cargo.toml file.
```toml
forust-ml = "0.2.1"
forust-ml = "0.2.2"
```

## Usage
Expand Down
4 changes: 2 additions & 2 deletions py-forust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "py-forust"
version = "0.2.1"
version = "0.2.2"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -10,6 +10,6 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.17", features = ["extension-module"] }
forust-ml = { version="0.2.1", path="../" }
forust-ml = { version="0.2.2", path="../" }
numpy = "0.17.2"
ndarray = "0.15.1"
4 changes: 2 additions & 2 deletions py-forust/tests/test_booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def test_booster_to_xgboosts_with_contributions(X_y):
objective_type="LogLoss",
nbins=500,
parallel=False,
base_score=0.0,
base_score=0.5,
)
fmod.fit(X, y=y)
fmod_preds = fmod.predict(X)
Expand All @@ -299,7 +299,7 @@ def test_booster_to_xgboosts_with_contributions(X_y):
eval_metric="auc",
tree_method="hist",
max_bin=10000,
base_score=0.0,
base_score=0.5,
)
xmod.fit(X, y)
xmod_preds = xmod.predict(X, output_margin=True)
Expand Down
2 changes: 1 addition & 1 deletion rs-example.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
To run this example, add the following code to your `Cargo.toml` file.
```toml
[dependencies]
forust-ml = "0.2.1"
forust-ml = "0.2.2"
polars = "0.24"
reqwest = { version = "0.11", features = ["blocking"] }
```
Expand Down
28 changes: 27 additions & 1 deletion src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ impl<'a, T> Matrix<'a, T> {
idx
}

/// Get access to a row of the data, as an iterator.
pub fn get_row_iter(
&self,
row: usize,
) -> std::iter::StepBy<std::iter::Skip<std::slice::Iter<T>>> {
self.data.iter().skip(row).step_by(self.rows)
}

/// Get a slice of a column in the matrix.
///
/// * `col` - The index of the column to select.
Expand All @@ -138,6 +146,16 @@ impl<'a, T> Matrix<'a, T> {
}
}

impl<'a, T> Matrix<'a, T>
where
T: Copy,
{
/// Get a row of the data as a vector.
pub fn get_row(&self, row: usize) -> Vec<T> {
self.get_row_iter(row).copied().collect()
}
}

/// A lightweight row major matrix, this is primarily
/// for returning data to the user, it is especially
/// suited for appending rows to, such as when building
Expand Down Expand Up @@ -227,7 +245,6 @@ impl<T> JaggedMatrix<T> {
n_records: 0,
}
}

/// Get the column of a jagged array.
pub fn get_col(&self, col: usize) -> &[T] {
assert!(col < self.ends.len());
Expand Down Expand Up @@ -285,6 +302,15 @@ mod tests {
assert_eq!(m.get_col(1), &vec![5, 6, 7]);
}

#[test]
fn test_matrix_row() {
let v = vec![1, 2, 3, 5, 6, 7];
let m = Matrix::new(&v, 3, 2);
assert_eq!(m.get_row(2), vec![3, 7]);
assert_eq!(m.get_row(0), vec![1, 5]);
assert_eq!(m.get_row(1), vec![2, 6]);
}

#[test]
fn test_jaggedmatrix_get_col() {
let vecs = vec![vec![0], vec![5, 4, 3, 2], vec![4, 5]];
Expand Down
62 changes: 59 additions & 3 deletions src/gradientbooster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,30 @@ impl GradientBooster {
init_preds
}

// pub fn predict(&self, data: &Matrix<f64>, parallel: bool) -> Vec<f64> {
// // After we disconvered it's faster materializing the row once, and then
// // Passing that to each tree, let's see if we can do the same with the booster
// // prediction...
// // Clean this up..
// let mut init_preds = vec![self.base_score; data.rows];
// if parallel {
// init_preds.par_iter_mut().enumerate().for_each(|(i, p)| {
// let pred_row = data.get_row(i);
// for t in &self.trees {
// *p += t.predict_row_from_row_slice(&pred_row);
// }
// });
// } else {
// init_preds.iter_mut().enumerate().for_each(|(i, p)| {
// let pred_row = data.get_row(i);
// for t in &self.trees {
// *p += t.predict_row_from_row_slice(&pred_row);
// }
// });
// }
// init_preds
// }

/// Generate predictions on data using the gradient booster.
///
/// * `data` - Either a pandas DataFrame, or a 2 dimensional numpy array.
Expand All @@ -232,9 +256,41 @@ impl GradientBooster {
.collect()
};
let mut contribs = vec![0.; (data.cols + 1) * data.rows];
self.trees.iter().zip(weights.iter()).for_each(|(t, w)| {
t.predict_contributions(data, &mut contribs, w, parallel);
});

// Add the bias term to every bias value...
let bias_idx = data.cols + 1;
contribs
.iter_mut()
.skip(bias_idx - 1)
.step_by(bias_idx)
.for_each(|v| *v += self.base_score);

// Clean this up..
// materializing a row, and then passing that to all of the
// trees seems to be the fastest approach (5X faster), we should test
// something like this for normal predictions.
if parallel {
data.index
.par_iter()
.zip(contribs.par_chunks_mut(data.cols + 1))
.for_each(|(row, c)| {
let r_ = data.get_row(*row);
self.trees.iter().zip(weights.iter()).for_each(|(t, w)| {
t.predict_contributions_row(&r_, c, w);
});
});
} else {
data.index
.iter()
.zip(contribs.chunks_mut(data.cols + 1))
.for_each(|(row, c)| {
let r_ = data.get_row(*row);
self.trees.iter().zip(weights.iter()).for_each(|(t, w)| {
t.predict_contributions_row(&r_, c, w);
});
});
}

contribs
}

Expand Down
29 changes: 18 additions & 11 deletions src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,17 @@ impl Tree {
}
}
}
pub fn predict_contributions_row(
&self,
data: &Matrix<f64>,
row: usize,
contribs: &mut [f64],
weights: &[f64],
) {
pub fn predict_contributions_row(&self, row: &[f64], contribs: &mut [f64], weights: &[f64]) {
// Add the bias term first...
contribs[data.cols] += weights[0];
contribs[contribs.len() - 1] += weights[0];
let mut node_idx = 0;
loop {
let node = &self.nodes[node_idx];
if node.is_leaf {
break;
}
// Get change of weight given child's weight.
let child_idx = node.get_child_idx(data.get(row, node.split_feature));
let child_idx = node.get_child_idx(&row[node.split_feature]);
let node_weight = weights[node_idx];
let child_weight = weights[child_idx];
let delta = child_weight - node_weight;
Expand All @@ -154,9 +148,10 @@ impl Tree {
.iter()
.zip(contribs.chunks_mut(data.cols + 1))
.for_each(|(row, contribs)| {
self.predict_contributions_row(data, *row, contribs, weights)
self.predict_contributions_row(&data.get_row(*row), contribs, weights)
})
}

fn predict_contributions_parallel(
&self,
data: &Matrix<f64>,
Expand All @@ -168,7 +163,7 @@ impl Tree {
.par_iter()
.zip(contribs.par_chunks_mut(data.cols + 1))
.for_each(|(row, contribs)| {
self.predict_contributions_row(data, *row, contribs, weights)
self.predict_contributions_row(&data.get_row(*row), contribs, weights)
})
}

Expand Down Expand Up @@ -198,6 +193,18 @@ impl Tree {
}
}

pub fn predict_row_from_row_slice(&self, row: &[f64]) -> f64 {
let mut node_idx = 0;
loop {
let node = &self.nodes[node_idx];
if node.is_leaf {
return node.weight_value as f64;
} else {
node_idx = node.get_child_idx(&row[node.split_feature]);
}
}
}

fn predict_single_threaded(&self, data: &Matrix<f64>) -> Vec<f64> {
data.index
.iter()
Expand Down

0 comments on commit 9155f60

Please sign in to comment.