Skip to content

Commit

Permalink
Fix KBinsDiscretizer bin_edges_
Browse files Browse the repository at this point in the history
  • Loading branch information
viclafargue committed May 16, 2022
1 parent 15aba09 commit cec1b57
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class KBinsDiscretizer(TransformerMixin,
"""

bin_edges_ = CumlArrayDescriptor()
bin_edges_internal_ = CumlArrayDescriptor()
n_bins_ = CumlArrayDescriptor()

@_deprecate_pos_args(version="21.06")
Expand Down Expand Up @@ -234,7 +234,7 @@ def fit(self, X, y=None) -> "KBinsDiscretizer":
'decreasing the number of bins.' % jj)
n_bins[jj] = len(bin_edges[jj]) - 1

self.bin_edges_ = bin_edges
self.bin_edges_internal_ = bin_edges
self.n_bins_ = n_bins

if 'onehot' in self.encode:
Expand Down Expand Up @@ -303,7 +303,7 @@ def transform(self, X) -> SparseCumlArray:
raise ValueError("Incorrect number of features. Expecting {}, "
"received {}.".format(n_features, Xt.shape[1]))

bin_edges = self.bin_edges_
bin_edges = self.bin_edges_internal_
for jj in range(Xt.shape[1]):
# Values which are close to a bin edge are susceptible to numeric
# instability. Add eps to X so these values are binned correctly
Expand Down Expand Up @@ -353,9 +353,13 @@ def inverse_transform(self, Xt) -> SparseCumlArray:
"received {}.".format(n_features, Xinv.shape[1]))

for jj in range(n_features):
bin_edges = self.bin_edges_[jj]
bin_edges = self.bin_edges_internal_[jj]
bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5
idxs = np.asnumpy(Xinv[:, jj])
Xinv[:, jj] = bin_centers[idxs.astype(np.int32)]

return Xinv

@property
def bin_edges_(self):
return self.bin_edges_internal_

0 comments on commit cec1b57

Please sign in to comment.