Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add QN solver to ElasticNet and Lasso models #4576

Merged
merged 2 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 52 additions & 7 deletions python/cuml/linear_model/elastic_net.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@

# distutils: language = c++

from cuml.solvers import CD
from inspect import signature

from cuml.solvers import CD, QN
from cuml.common.base import Base
from cuml.common.mixins import RegressorMixin
from cuml.common.doc_utils import generate_docstring
from cuml.common.array import CumlArray
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.common.logger import warn
from cuml.common.mixins import FMajorInputTagMixin
from cuml.linear_model.base import LinearPredictMixin

Expand Down Expand Up @@ -117,6 +121,14 @@ class ElasticNet(Base,
The tolerance for the optimization: if the updates are smaller than
tol, the optimization code checks the dual gap for optimality and
continues until it is smaller than tol.
solver : {'cd', 'qn'} (default='cd')
Choose an algorithm:

* 'cd' - coordinate descent
* 'qn' - quasi-newton

You may find the alternative 'qn' algorithm is faster when the number
of features is sufficiently large, but the sample size is small.
selection : {'cyclic', 'random'} (default='cyclic')
If set to ‘random’, a random coefficient is updated every iteration
rather than looping over features sequentially by default.
Expand Down Expand Up @@ -154,7 +166,8 @@ class ElasticNet(Base,
coef_ = CumlArrayDescriptor()

def __init__(self, *, alpha=1.0, l1_ratio=0.5, fit_intercept=True,
normalize=False, max_iter=1000, tol=1e-3, selection='cyclic',
normalize=False, max_iter=1000, tol=1e-3,
solver='cd', selection='cyclic',
handle=None, output_type=None, verbose=False):
"""
Initializes the elastic-net regression class.
Expand All @@ -167,6 +180,7 @@ class ElasticNet(Base,
normalize: boolean.
max_iter: int
tol: float or double.
solver: str, 'cd' or 'qn'
selection : str, ‘cyclic’, or 'random'

For additional docs, see `scikitlearn's ElasticNet
Expand All @@ -184,6 +198,7 @@ class ElasticNet(Base,
self.alpha = alpha
self.l1_ratio = l1_ratio
self.fit_intercept = fit_intercept
self.solver = solver
self.normalize = normalize
self.max_iter = max_iter
self.tol = tol
Expand All @@ -200,11 +215,31 @@ class ElasticNet(Base,
if self.selection == 'random':
shuffle = True

self.solver_model = CD(fit_intercept=self.fit_intercept,
normalize=self.normalize, alpha=self.alpha,
l1_ratio=self.l1_ratio, shuffle=shuffle,
max_iter=self.max_iter, handle=self.handle,
tol=self.tol)
if solver == 'qn':
pams = signature(self.__init__).parameters
if (pams['selection'].default != selection):
warn("Parameter 'selection' has no effect "
"when 'qn' solver is used.")
if (pams['normalize'].default != normalize):
warn("Parameter 'normalize' has no effect "
"when 'qn' solver is used.")

self.solver_model = QN(
fit_intercept=self.fit_intercept,
l1_strength=self.alpha * self.l1_ratio,
l2_strength=self.alpha * (1.0 - self.l1_ratio),
max_iter=self.max_iter, handle=self.handle,
loss='l2', tol=self.tol, penalty_normalized=False,
verbose=self.verbose)
elif solver == 'cd':
self.solver_model = CD(
fit_intercept=self.fit_intercept,
normalize=self.normalize, alpha=self.alpha,
l1_ratio=self.l1_ratio, shuffle=shuffle,
max_iter=self.max_iter, handle=self.handle,
tol=self.tol)
else:
raise TypeError(f"solver {solver} is not supported")

def _check_alpha(self, alpha):
if alpha <= 0.0:
Expand All @@ -223,6 +258,15 @@ class ElasticNet(Base,

"""
self.solver_model.fit(X, y, convert_dtype=convert_dtype)
if isinstance(self.solver_model, QN):
self.coef_ = CumlArray(
data=self.solver_model.coef_,
index=self.solver_model.coef_._index,
dtype=self.solver_model.coef_.dtype,
order=self.solver_model.coef_.order,
shape=(self.solver_model.coef_.shape[0],)
)
self.intercept_ = self.solver_model.intercept_.item()

return self

Expand All @@ -242,5 +286,6 @@ class ElasticNet(Base,
"normalize",
"max_iter",
"tol",
"solver",
"selection",
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -14,20 +14,10 @@
# limitations under the License.
#

# distutils: language = c++
from cuml.linear_model.elastic_net import ElasticNet

from cuml.solvers import CD
from cuml.common.base import Base
from cuml.common.mixins import RegressorMixin
from cuml.common.doc_utils import generate_docstring
from cuml.common.mixins import FMajorInputTagMixin
from cuml.linear_model.base import LinearPredictMixin


class Lasso(Base,
LinearPredictMixin,
RegressorMixin,
FMajorInputTagMixin):
class Lasso(ElasticNet):

"""
Lasso extends LinearRegression by providing L1 regularization on the
Expand Down Expand Up @@ -92,23 +82,34 @@ class Lasso(Base,
alpha : float (default = 1.0)
Constant that multiplies the L1 term.
alpha = 0 is equivalent to an ordinary least square, solved by the
LinearRegression class.
For numerical reasons, using alpha = 0 with the Lasso class is not
LinearRegression object.
For numerical reasons, using alpha = 0 with the Lasso object is not
advised.
Given this, you should use the LinearRegression class.
Given this, you should use the LinearRegression object.
fit_intercept : boolean (default = True)
If True, Lasso tries to correct for the global mean of y.
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by it's L2
norm.
If True, the predictors in X will be normalized by dividing by the
column-wise standard deviation.
If False, no scaling will be done.
max_iter : int
Note: this is in contrast to sklearn's deprecated `normalize` flag,
which divides by the column-wise L2 norm; but this is the same as if
using sklearn's StandardScaler.
max_iter : int (default = 1000)
The maximum number of iterations
tol : float (default = 1e-3)
The tolerance for the optimization: if the updates are smaller than
tol, the optimization code checks the dual gap for optimality and
continues until it is smaller than tol.
solver : {'cd', 'qn'} (default='cd')
Choose an algorithm:

* 'cd' - coordinate descent
* 'qn' - quasi-newton

You may find the alternative 'qn' algorithm is faster when the number
of features is sufficiently large, but the sample size is small.
selection : {'cyclic', 'random'} (default='cyclic')
If set to ‘random’, a random coefficient is updated every iteration
rather than looping over features sequentially by default.
Expand Down Expand Up @@ -143,69 +144,16 @@ class Lasso(Base,
<https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html>`_.
"""

def __init__(self, *, alpha=1.0, fit_intercept=True, normalize=False,
max_iter=1000, tol=1e-3, selection='cyclic', handle=None,
output_type=None, verbose=False):

# Hard-code verbosity as CoordinateDescent does not have verbosity
super().__init__(handle=handle,
verbose=verbose,
output_type=output_type)

self._check_alpha(alpha)
self.alpha = alpha
self.fit_intercept = fit_intercept
self.normalize = normalize
self.max_iter = max_iter
self.tol = tol
self.solver_model = None
if selection in ['cyclic', 'random']:
self.selection = selection
else:
msg = "selection {!r} is not supported"
raise TypeError(msg.format(selection))

self.intercept_value = 0.0

shuffle = False
if self.selection == 'random':
shuffle = True

self.solver_model = CD(fit_intercept=self.fit_intercept,
normalize=self.normalize, alpha=self.alpha,
l1_ratio=1.0, shuffle=shuffle,
max_iter=self.max_iter, handle=self.handle,
tol=self.tol)

def _check_alpha(self, alpha):
if alpha <= 0.0:
msg = "alpha value has to be positive"
raise ValueError(msg.format(alpha))

def set_params(self, **params):
super().set_params(**params)
if 'selection' in params:
params.pop('selection')
params['shuffle'] = self.selection == 'random'
self.solver_model.set_params(**params)
return self

@generate_docstring()
def fit(self, X, y, convert_dtype=True) -> "Lasso":
"""
Fit the model with X and y.

"""
self.solver_model.fit(X, y, convert_dtype=convert_dtype)

return self
def __init__(self, *, alpha=1.0, fit_intercept=True,
normalize=False, max_iter=1000, tol=1e-3,
solver='cd', selection='cyclic',
handle=None, output_type=None, verbose=False):
# Lasso is just a special case of ElasticNet
super().__init__(
l1_ratio=1.0, alpha=alpha, fit_intercept=fit_intercept,
normalize=normalize, max_iter=max_iter, tol=tol,
solver=solver, selection=selection,
handle=handle, output_type=output_type, verbose=verbose)

def get_param_names(self):
return super().get_param_names() + [
"alpha",
"fit_intercept",
"normalize",
"max_iter",
"tol",
"selection",
]
return list(set(super().get_param_names()) - {'l1_ratio'})
37 changes: 36 additions & 1 deletion python/cuml/test/test_linear_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,7 @@
import pytest
from distutils.version import LooseVersion
import cudf
from cuml import ElasticNet as cuElasticNet
from cuml import LinearRegression as cuLinearRegression
from cuml import LogisticRegression as cuLog
from cuml import Ridge as cuRidge
Expand Down Expand Up @@ -671,3 +672,37 @@ def test_linear_models_set_params(algo):

assert not array_equal(coef_before, coef_after)
assert array_equal(coef_after, coef_test)


@pytest.mark.parametrize("datatype", [np.float32, np.float64])
@pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0])
@pytest.mark.parametrize("l1_ratio", [0.1, 0.5, 0.9])
@pytest.mark.parametrize(
"nrows", [unit_param(1000), quality_param(5000), stress_param(500000)]
)
@pytest.mark.parametrize(
"column_info",
[
unit_param([20, 10]),
quality_param([100, 50]),
stress_param([1000, 500])
],
)
def test_elasticnet_solvers_eq(datatype, alpha, l1_ratio, nrows, column_info):

ncols, n_info = column_info
X_train, X_test, y_train, y_test = make_regression_dataset(
datatype, nrows, ncols, n_info
)

kwargs = {'alpha': alpha, 'l1_ratio': l1_ratio}
cd = cuElasticNet(solver='cd', **kwargs)
cd.fit(X_train, y_train)
cd_res = cd.predict(X_test)

qn = cuElasticNet(solver='qn', **kwargs)
qn.fit(X_train, y_train)
# the results of the two models should be close (even if both are bad)
assert qn.score(X_test, cd_res) > 0.95
# coefficients of the two models should be close
assert np.corrcoef(cd.coef_, qn.coef_)[0, 1] > 0.98