Skip to content

Commit

Permalink
Add warning about upcoming change of default value for alpha_min_ratio
Browse files Browse the repository at this point in the history
The default value of alpha_min_ratio will
depend on the sample size relative to the
number of features in 0.13.
If `n_samples > n_features`, the current
default value 0.0001 will be used.
If `n_samples < n_features`, 0.01 will be
used instead.

See #41 (comment)
  • Loading branch information
sebp committed Apr 11, 2020
1 parent 289a5cd commit dfd645e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 22 deletions.
15 changes: 14 additions & 1 deletion sksurv/linear_model/coxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class CoxnetSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
alpha (i.e. the smallest value for which all
coefficients are zero).
The default value of alpha_min_ratio will depend on the
sample size relative to the number of features in 0.13.
If `n_samples > n_features`, the current default value 0.0001
will be used. If `n_samples < n_features`, 0.01 will be used instead.
l1_ratio : float, optional, default: 0.5
The ElasticNet mixing parameter, with ``0 < l1_ratio <= 1``.
For ``l1_ratio = 0`` the penalty is an L2 penalty.
Expand Down Expand Up @@ -110,7 +115,7 @@ class CoxnetSurvivalAnalysis(BaseEstimator, SurvivalAnalysisMixin):
Journal of statistical software. 2011 Mar;39(5):1.
"""

def __init__(self, n_alphas=100, alphas=None, alpha_min_ratio=0.0001, l1_ratio=0.5,
def __init__(self, n_alphas=100, alphas=None, alpha_min_ratio="warn", l1_ratio=0.5,
penalty_factor=None, normalize=False, copy_X=True,
tol=1e-7, max_iter=100000, verbose=False, fit_baseline_model=False):
self.n_alphas = n_alphas
Expand Down Expand Up @@ -198,6 +203,14 @@ def fit(self, X, y):
X, event_num, time = self._pre_fit(X, y)
create_path, alphas, penalty = self._check_params(X.shape[1])

if self.alpha_min_ratio == 'warn':
warnings.warn("The default value of alpha_min_ratio will depend on the "
"sample size relative to the number of features in 0.13. "
"If n_samples > n_features, the current default value 0.0001 "
"will be used. If n_samples < n_features, 0.01 will be used instead.",
FutureWarning)
self.alpha_min_ratio = 0.0001

coef, alphas, deviance_ratio, n_iter = call_fit_coxnet(
X, time, event_num, penalty, alphas, create_path,
self.alpha_min_ratio, self.l1_ratio, int(self.max_iter),
Expand Down
47 changes: 26 additions & 21 deletions tests/test_coxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_example_1(self, make_example_coef):
0.00712686021728439, 0.00649372959803068, 0.00591684455801036, 0.00539120839498366, 0.00491226829996627,
0.00447587592297602])

x, coxnet = self._fit_example(l1_ratio=0.5)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.5)
assert_array_almost_equal(expected_alphas, coxnet.alphas_)

expected_coef = make_example_coef(1)
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_example_1_penalty_factor_1(self, make_example_coef):

pf = numpy.ones(30)
pf[4] = 0.125
x, coxnet = self._fit_example(l1_ratio=0.5, penalty_factor=pf)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.5, penalty_factor=pf)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -184,7 +184,7 @@ def test_example_1_penalty_factor_2(self, make_example_coef):
pf[4] = 0.125
pf[10] = 1.25
pf[12] = 0.75
x, coxnet = self._fit_example(l1_ratio=0.5, penalty_factor=pf)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.5, penalty_factor=pf)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -226,7 +226,7 @@ def test_example_1_unpenalized(self, make_example_coef):
pf = numpy.ones(30)
pf[0] = 0
pf[29] = 0
x, coxnet = self._fit_example(l1_ratio=0.5, penalty_factor=pf)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.5, penalty_factor=pf)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -262,7 +262,7 @@ def test_example_2(self, make_example_coef):
0.00630443160807172, 0.00574436327975223, 0.00523404987810766, 0.00476907131258251, 0.004345400161284,
0.00395936678738022, 0.00360762755446149, 0.00328713586556131, 0.00299511577499092])

x, coxnet = self._fit_example(l1_ratio=0.9)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.9)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -314,7 +314,7 @@ def test_example_2_normalize(self, make_example_coef):
8.71498712373515e-05, 7.94077168717352e-05, 7.23533541616886e-05, 6.59256815921643e-05,
6.00690257383086e-05, 5.47326590488896e-05])

x, coxnet = self._fit_example(l1_ratio=0.9, normalize=True)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.9, normalize=True)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -357,7 +357,7 @@ def test_example_2_standardize(make_example_coef):
0.0039983693660421, 0.00364316525153066, 0.00331951649156886, 0.0030246197954287])

scaler = StandardScaler()
coxnet = CoxnetSurvivalAnalysis(l1_ratio=0.9)
coxnet = CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001, l1_ratio=0.9)
pipe = Pipeline([("standardize", scaler),
("coxnet", coxnet)])
pipe.fit(x.values, y)
Expand All @@ -373,7 +373,7 @@ def test_example_2_standardize(make_example_coef):
def test_example_2_with_alpha(self, make_example_coef):
expected_alphas = numpy.array([0.45, 0.4, 0.35, 0.25, 0.1, 0.05, 0.001])

x, coxnet = self._fit_example(l1_ratio=0.9, alphas=expected_alphas, normalize=True)
x, coxnet = self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.9, alphas=expected_alphas, normalize=True)

assert_array_almost_equal(expected_alphas, coxnet.alphas_)

Expand Down Expand Up @@ -468,28 +468,28 @@ def test_max_iter(self):
with pytest.warns(ConvergenceWarning,
match=r'Optimization terminated early, you might want'
r' to increase the number of iterations \(max_iter=100\).'):
self._fit_example(l1_ratio=0.9, max_iter=100)
self._fit_example(alpha_min_ratio=0.0001, l1_ratio=0.9, max_iter=100)

@pytest.mark.parametrize('val', [0, -1, -1e-6, 1 + 1e-6, 1512, numpy.nan, numpy.infty])
def test_invalid_l1_ratio(self, val):
with pytest.raises(ValueError,
match=r"l1_ratio must be in interval \]0;1\]"):
self._fit_example(l1_ratio=val)
self._fit_example(alpha_min_ratio=0.0001, l1_ratio=val)

def test_invalid_tol(self, invalid_positive_int):
with pytest.raises(ValueError,
match="tolerance must be positive"):
self._fit_example(tol=invalid_positive_int)
self._fit_example(alpha_min_ratio=0.0001, tol=invalid_positive_int)

def test_invalid_max_iter(self, invalid_positive_int):
with pytest.raises(ValueError,
match="max_iter must be a positive integer"):
self._fit_example(max_iter=invalid_positive_int)
self._fit_example(alpha_min_ratio=0.0001, max_iter=invalid_positive_int)

def test_invalid_n_alphas(self, invalid_positive_int):
with pytest.raises(ValueError,
match="n_alphas must be a positive integer"):
self._fit_example(n_alphas=invalid_positive_int)
self._fit_example(alpha_min_ratio=0.0001, n_alphas=invalid_positive_int)

@pytest.mark.parametrize('length', [0, 1, 29, 31])
def test_invalid_penalty_factor_length(self, length):
Expand All @@ -498,27 +498,32 @@ def test_invalid_penalty_factor_length(self, length):

array = numpy.empty(length, dtype=float)
with pytest.raises(ValueError, match=msg):
self._fit_example(penalty_factor=array)
self._fit_example(alpha_min_ratio=0.0001, penalty_factor=array)

def test_negative_penalty_factor_value(self, negative_float_array):
with pytest.raises(ValueError,
match="Negative values in data passed to penalty_factor"):
self._fit_example(penalty_factor=negative_float_array)
self._fit_example(alpha_min_ratio=0.0001, penalty_factor=negative_float_array)

def test_invalid_penalty_factor_value(self, infinite_float_array):
with pytest.raises(ValueError,
match="Input contains NaN, infinity or a value too large"):
self._fit_example(penalty_factor=infinite_float_array)
self._fit_example(alpha_min_ratio=0.0001, penalty_factor=infinite_float_array)

def test_negative_alphas(self, negative_float_array):
with pytest.raises(ValueError,
match="Negative values in data passed to alphas"):
self._fit_example(alphas=negative_float_array)
self._fit_example(alpha_min_ratio=0.0001, alphas=negative_float_array)

def test_invalid_alphas(self, infinite_float_array):
with pytest.raises(ValueError,
match="Input contains NaN, infinity or a value too large"):
self._fit_example(alphas=infinite_float_array)
self._fit_example(alpha_min_ratio=0.0001, alphas=infinite_float_array)

def test_alpha_min_ratio_future_warning(self):
with pytest.warns(FutureWarning,
match="The default value of alpha_min_ratio will depend "):
self._fit_example()

@staticmethod
def test_alpha_too_small():
Expand All @@ -534,7 +539,7 @@ def test_alpha_too_small():
184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197
])

nn = CoxnetSurvivalAnalysis(alphas=[0.007295025406624247], l1_ratio=1.0)
nn = CoxnetSurvivalAnalysis(alphas=[0.007295025406624247], alpha_min_ratio=0.0001, l1_ratio=1.0)
Xf, yf = Xt.iloc[index], y[index]

with pytest.raises(ArithmeticError,
Expand All @@ -546,7 +551,7 @@ def test_breast_example():
x, y = load_breast_cancer()
x = column.encode_categorical(x)

coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0)
coxnet = CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001, l1_ratio=1.0)
coxnet.fit(x.values, y)

expected_alphas = numpy.array([
Expand Down Expand Up @@ -611,7 +616,7 @@ def test_simple():
"F3": [120, 98, 78, 91, 79],
"F4": [0.123, 0.541, 0.784, 0.846, 0.331]})

coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0)
coxnet = CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001, l1_ratio=1.0)
coxnet.fit(x.values, y)

expected_alphas = numpy.array(
Expand Down

0 comments on commit dfd645e

Please sign in to comment.