Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add get_params() to TargetEncoder #4588

Merged
merged 20 commits into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions python/cuml/preprocessing/TargetEncoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def __init__(self, n_folds=4, smooth=0, seed=42,
'customize'}:
msg = ("split_method should be either 'random'"
" or 'continuous' or 'interleaved', or 'customize'"
"got {0}.".format(self.split))
"got {0}.".format(split_method))
raise ValueError(msg)

self.n_folds = n_folds
self.seed = seed
self.smooth = smooth
self.split = split_method
self.split_method = split_method
self.y_col = '__TARGET__'
self.x_col = '__FEA__'
self.out_col = '__TARGET_ENCODE__'
Expand Down Expand Up @@ -139,12 +139,12 @@ def fit(self, x, y, fold_ids=None):
self : TargetEncoder
A fitted instance of itself to allow method chaining
"""
if self.split == 'customize' and fold_ids is None:
if self.split_method == 'customize' and fold_ids is None:
raise ValueError("`fold_ids` is required "
"since split_method is set to"
"'customize'.")
if fold_ids is not None and self.split != 'customize':
self.split == 'customize'
if fold_ids is not None and self.split_method != 'customize':
self.split_method == 'customize'
warnings.warn("split_method is set to 'customize'"
"since `fold_ids` are provided.")
if fold_ids is not None and len(fold_ids) != len(x):
Expand Down Expand Up @@ -279,26 +279,26 @@ def _make_y_column(self, y):

def _make_fold_column(self, len_train, fold_ids):
"""
Create a fold id column for each split_method
Create a fold id column for each split
"""

if self.split == 'random':
if self.split_method == 'random':
return cp.random.randint(0, self.n_folds, len_train)
elif self.split == 'continuous':
elif self.split_method == 'continuous':
return (cp.arange(len_train) /
(len_train/self.n_folds)) % self.n_folds
elif self.split == 'interleaved':
elif self.split_method == 'interleaved':
return cp.arange(len_train) % self.n_folds
elif self.split == 'customize':
elif self.split_method == 'customize':
if fold_ids is None:
raise ValueError("fold_ids can't be None"
"since split_method is set to"
"'customize'.")
return fold_ids
else:
msg = ("split should be either 'random'"
msg = ("split_method should be either 'random'"
" or 'continuous' or 'interleaved', "
"got {0}.".format(self.split))
"got {0}.".format(self.split_method))
raise ValueError(msg)

def _compute_output(self, df_sum, df_count, cols, y_col):
Expand Down Expand Up @@ -407,3 +407,19 @@ def _get_output_type(self, x):
or isinstance(x, pandas.Series):
return 'numpy'
return 'cupy'

def get_param_names(self):
return [
"n_folds", "smooth", "seed", "split_method",
]

def get_params(self):
"""
Returns a dict of all params owned by this class.
"""
params = dict()
variables = self.get_param_names()
for key in variables:
var_value = getattr(self, key, None)
params[key] = var_value
return params
Comment on lines +416 to +425
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be inheritable from the cuml.common.base.Base class.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the review. Yes, I tried inheriting base in the first place but I ended up making more changes than necessary to pass test_base.py. I'll try again in another PR.

13 changes: 13 additions & 0 deletions python/cuml/test/test_target_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,16 @@ def test_transform_with_index():

train_encoded = t_enc.transform(df[["a"]])
assert array_equal(train_encoded, ans)


def test_get_params():
params = {
'n_folds': 5,
'smooth': 1,
'seed': 49,
'split_method': 'customize'
}
encoder = TargetEncoder(**params)
p2 = encoder.get_params()
for k, v in params.items():
assert v == p2[k]