Skip to content

Commit

Permalink
MAINT Update submodule commit and remove cnp imports where possible (#…
Browse files Browse the repository at this point in the history
…249)

* Maintenance to update submodule commit 

---------

Signed-off-by: Adam Li <[email protected]>
  • Loading branch information
adam2392 authored Apr 9, 2024
1 parent 9c02f0b commit 232747e
Show file tree
Hide file tree
Showing 18 changed files with 299 additions and 136 deletions.
2 changes: 1 addition & 1 deletion sktree/_lib/sklearn_fork
Submodule sklearn_fork updated 158 files
96 changes: 87 additions & 9 deletions sktree/tree/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from scipy.sparse import issparse
from sklearn.base import ClusterMixin, TransformerMixin
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils import check_random_state
from sklearn.utils._param_validation import Interval
from sklearn.utils.validation import check_is_fitted

Expand Down Expand Up @@ -918,6 +919,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

if self.feature_combinations is None:
Expand Down Expand Up @@ -963,7 +965,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -972,7 +974,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -982,7 +984,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand All @@ -996,6 +998,78 @@ def _inheritable_fitted_attribute(self):
"feature_combinations_",
]

def _update_tree(self, X, y, sample_weight):
# Update tree
max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes
min_samples_split = self.min_samples_split_
min_samples_leaf = self.min_samples_leaf_
min_weight_leaf = self.min_weight_leaf_
# set decision-tree model parameters
max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth

monotonic_cst = self.monotonic_cst_

# Build tree
# Note: this reconstructs the builder with the same state it had during the
# initial fit. This is necessary because the builder is not saved as part
# of the class, and thus the state may be lost if pickled/unpickled.
criterion = self.criterion
if not isinstance(criterion, BaseCriterion):
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self._n_classes_)
else:
# Make a deepcopy in case the criterion has mutable attributes that
# might be shared and modified concurrently during parallel fitting
criterion = copy.deepcopy(criterion)

random_state = check_random_state(self.random_state)

splitter = self.splitter
if issparse(X):
raise ValueError(
"Sparse input is not supported for oblique trees. "
"Please convert your data to a dense array."
)
else:
SPLITTERS = OBLIQUE_DENSE_SPLITTERS
if not isinstance(self.splitter, ObliqueSplitter):
splitter = SPLITTERS[self.splitter](
criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
monotonic_cst,
self.feature_combinations_,
)

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
self.store_leaf_values,
)
else:
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
self.store_leaf_values,
)
builder.initialize_node_queue(self.tree_, X, y, sample_weight)
builder.build(self.tree_, X, y, sample_weight)

self._prune_tree()
return self


class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
"""An oblique decision tree Regressor.
Expand Down Expand Up @@ -1785,6 +1859,7 @@ def _build_tree(
)

monotonic_cst = None
self.monotonic_cst_ = monotonic_cst

# Build tree
criterion = self.criterion
Expand Down Expand Up @@ -1825,7 +1900,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -1834,7 +1909,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -1844,7 +1919,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand Down Expand Up @@ -2263,6 +2338,7 @@ def _build_tree(
)

monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
n_samples = X.shape[0]

# Build tree
Expand Down Expand Up @@ -2692,6 +2768,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

if self.feature_combinations is None:
Expand Down Expand Up @@ -2737,7 +2814,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -2746,7 +2823,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -2756,7 +2833,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand Down Expand Up @@ -3088,6 +3165,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
n_samples, n_features = X.shape

if self.feature_combinations is None:
Expand Down
4 changes: 3 additions & 1 deletion sktree/tree/_honest_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,10 @@ def _inherit_estimator_attributes(self):
self.tree_ = self.estimator_.tree_

# XXX: scikit-learn trees do not store their builder, or min_samples_split_
self.builder_ = getattr(self.estimator_, "builder_", None)
self.min_samples_split_ = getattr(self.estimator_, "min_samples_split_", None)
self.min_samples_leaf_ = getattr(self.estimator_, "min_samples_leaf_", None)
self.min_weight_leaf_ = getattr(self.estimator_, "min_weight_leaf_", None)
self.monotonic_cst_ = getattr(self.estimator_, "monotonic_cst_", None)

def _empty_leaf_correction(self, proba, pos=0):
"""Leaves with empty posteriors are assigned values.
Expand Down
3 changes: 1 addition & 2 deletions sktree/tree/_marginal.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ import numpy as np
cimport numpy as cnp

from .._lib.sklearn.tree._tree cimport BaseTree, Node
from .._lib.sklearn.tree._utils cimport UINT32_t
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t, uint32_t


cpdef apply_marginal_tree(
Expand Down
6 changes: 3 additions & 3 deletions sktree/tree/_marginal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ cpdef apply_marginal_tree(
cdef intp_t n_marginals = marginal_indices.shape[0]

# sklearn_rand_r random number state
cdef UINT32_t rand_r_state = random_state.randint(0, RAND_R_MAX)
cdef uint32_t rand_r_state = random_state.randint(0, RAND_R_MAX)

# define a set of all marginal indices
cdef unordered_set[intp_t] marginal_indices_map
Expand Down Expand Up @@ -108,7 +108,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
unordered_set[intp_t] marginal_indices_map,
intp_t traversal_method,
unsigned char use_sample_weight,
UINT32_t* rand_r_state
uint32_t* rand_r_state
):
"""Finds the terminal region (=leaf node) for each sample in X.
Expand All @@ -131,7 +131,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
use_sample_weight : unsigned char
Whether or not to use the weighted number of samples
in each node.
rand_r_state : UINT32_t
rand_r_state : uint32_t
The random number state.
"""
# Extract input
Expand Down
83 changes: 80 additions & 3 deletions sktree/tree/_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
from scipy.sparse import issparse
from sklearn.utils import check_random_state
from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions

from .._lib.sklearn.tree import DecisionTreeClassifier, _criterion
Expand Down Expand Up @@ -360,6 +361,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

self.feature_combinations_ = 1
Expand Down Expand Up @@ -495,7 +497,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -504,7 +506,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -514,12 +516,87 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]

def _update_tree(self, X, y, sample_weight):
# Update tree
max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes
min_samples_split = self.min_samples_split_
min_samples_leaf = self.min_samples_leaf_
min_weight_leaf = self.min_weight_leaf_
# set decision-tree model parameters
max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth

monotonic_cst = self.monotonic_cst_

# Build tree
# Note: this reconstructs the builder with the same state it had during the
# initial fit. This is necessary because the builder is not saved as part
# of the class, and thus the state may be lost if pickled/unpickled.
criterion = self.criterion
if not isinstance(criterion, BaseCriterion):
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self._n_classes_)
else:
# Make a deepcopy in case the criterion has mutable attributes that
# might be shared and modified concurrently during parallel fitting
criterion = copy.deepcopy(criterion)

random_state = check_random_state(self.random_state)

splitter = self.splitter
if issparse(X):
raise ValueError(
"Sparse input is not supported for oblique trees. "
"Please convert your data to a dense array."
)
else:
SPLITTERS = DENSE_SPLITTERS
if not isinstance(self.splitter, ObliqueSplitter):
splitter = SPLITTERS[self.splitter](
criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
monotonic_cst,
self.feature_combinations_,
self.feature_set_ends_,
self.n_feature_sets_,
self.max_features_per_set_,
)

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
self.store_leaf_values,
)
else:
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
self.store_leaf_values,
)
builder.initialize_node_queue(self.tree_, X, y, sample_weight)
builder.build(self.tree_, X, y, sample_weight)

self._prune_tree()
return self

def fit(self, X, y, sample_weight=None, check_input=True, classes=None):
"""Build a decision tree classifier from the training set (X, y).
Expand Down
Loading

0 comments on commit 232747e

Please sign in to comment.