Skip to content

Commit

Permalink
TEST & DOC : add integration tests for CrossConformalRegressor, finis…
Browse files Browse the repository at this point in the history
…h related docstring for param cv (#544)
  • Loading branch information
Valentin-Laurent committed Dec 29, 2024
1 parent 65fe7e9 commit aa6e027
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 46 deletions.
154 changes: 114 additions & 40 deletions mapie_v1/integration_tests/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,29 @@

import numpy as np
import pytest
from numpy.random import RandomState
from sklearn.compose import TransformedTargetRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

from mapie_v1.regression import (
SplitConformalRegressor,
CrossConformalRegressor,
JackknifeAfterBootstrapRegressor,
ConformalizedQuantileRegressor
)
from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
from mapiev0.regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
from mapie.conformity_scores import GammaConformityScore, \
AbsoluteConformityScore
from mapie_v1.regression import SplitConformalRegressor, \
CrossConformalRegressor

from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa

from mapie_v1.conformity_scores.utils import \
check_and_select_split_conformity_score
from mapie_v1.integration_tests.utils import (filter_params,
train_test_split_shuffle)
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut, GroupKFold

RANDOM_STATE = 1
K_FOLDS = 3
N_BOOTSTRAPS = 30

X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
y_toy = np.array([5, 7, 9, 11, 13, 15])
X, y = make_regression(n_samples=500,
n_features=10,
noise=1.0,
Expand Down Expand Up @@ -57,7 +56,6 @@ def test_exact_interval_equality_split(
Test that the prediction intervals are exactly the same
between v0 and v1 models when using the same settings.
"""

X_train, X_conf, y_train, y_conf = train_test_split_shuffle(
X, y, test_size=test_size, random_state=RANDOM_STATE
)
Expand Down Expand Up @@ -92,8 +90,6 @@ def test_exact_interval_equality_split(
strategy_key=strategy_key,
v0_params=v0_params,
v1_params=v1_params,
k_folds=K_FOLDS,
random_state=RANDOM_STATE
)

if strategy_key == 'prefit':
Expand All @@ -117,14 +113,115 @@ def test_exact_interval_equality_split(
)


X_cross, y_cross_signed = make_regression(
n_samples=50,
n_features=10,
noise=1.0,
random_state=RANDOM_STATE
)
y_cross = np.abs(y_cross_signed)
sample_weight = RandomState(RANDOM_STATE).random(len(X_cross))
groups = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10 + [4] * 10
positive_predictor = TransformedTargetRegressor(
regressor=LinearRegression(),
func=lambda y_: np.log(y_ + 1),
inverse_func=lambda X_: np.exp(X_) - 1
)

params_test_cases_cross = [
{
"v0": {
"alpha": 0.2,
"conformity_score": AbsoluteConformityScore(),
"cv": 4,
"agg_function": "median",
"ensemble": True,
"method": "base",
"sample_weight": sample_weight,
},
"v1": {
"confidence_level": 0.8,
"conformity_score": "absolute",
"cv": 4,
"aggregation_method": "median",
"method": "base",
"fit_params": {"sample_weight": sample_weight},
}
},
{
"v0": {
"estimator": positive_predictor,
"alpha": 0.5,
"conformity_score": GammaConformityScore(),
"cv": LeaveOneOut(),
"method": "plus",
"optimize_beta": True,
},
"v1": {
"estimator": positive_predictor,
"confidence_level": 0.5,
"conformity_score": "gamma",
"cv": LeaveOneOut(),
"method": "plus",
"minimize_interval_width": True,
}
},
{
"v0": {
"alpha": 0.1,
"cv": GroupKFold(),
"groups": groups,
"method": "minmax",
"allow_infinite_bounds": True,
},
"v1": {
"cv": GroupKFold(),
"groups": groups,
"method": "minmax",
"allow_infinite_bounds": True,
}
},
]


@pytest.mark.parametrize("params_cross", params_test_cases_cross)
def test_intervals_and_predictions_exact_equality_cross(params_cross):
v0_params = params_cross["v0"]
v1_params = params_cross["v1"]

v0 = MapieRegressorV0(
**filter_params(MapieRegressorV0.__init__, v0_params)
)
v1 = CrossConformalRegressor(
**filter_params(CrossConformalRegressor.__init__, v1_params)
)

v0_fit_params = filter_params(v0.fit, v0_params)
v1_fit_params = filter_params(v1.fit, v1_params)
v1_conformalize_params = filter_params(v1.conformalize, v1_params)

v0.fit(X_cross, y_cross, **v0_fit_params)
v1.fit(X_cross, y_cross, **v1_fit_params)
v1.conformalize(X_cross, y_cross, **v1_conformalize_params)

v0_predict_params = filter_params(v0.predict, v0_params)
v1_predict_params = filter_params(v1.predict, v1_params)
v1_predict_set_params = filter_params(v1.predict_set, v1_params)

v0_preds, v0_pred_intervals = v0.predict(X_cross, **v0_predict_params)
v0_pred_intervals = v0_pred_intervals[:, :, 0]
v1_pred_intervals = v1.predict_set(X_cross, **v1_predict_set_params)
v1_preds = v1.predict(X_cross, **v1_predict_params)

assert np.equal(v0_preds, v1_preds)
assert np.equal(v0_pred_intervals, v1_pred_intervals)


def initialize_models(
strategy_key,
v0_params: dict,
v1_params: dict,
k_folds=5,
random_state=42
):

if strategy_key == "prefit":
v0_params.update({"cv": "prefit"})
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
Expand All @@ -139,29 +236,6 @@ def initialize_models(
v0 = MapieRegressorV0(**v0_params)
v1 = SplitConformalRegressor(**v1_params)

elif strategy_key == "cv":
v0_params.update({"cv": KFold(n_splits=k_folds,
shuffle=True,
random_state=random_state)})
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
v1_params = filter_params(CrossConformalRegressor.__init__, v1_params)
v0 = MapieRegressorV0(**v0_params)
v1 = CrossConformalRegressor(cv=k_folds, **v1_params)

elif strategy_key == "jackknife":
v0_params.update({"cv": -1})
v0_params = filter_params(MapieRegressorV0.__init__, v0_params)
v1_params = filter_params(JackknifeAfterBootstrapRegressor.__init__,
v1_params)
v0 = MapieRegressorV0(**v0_params)
v1 = JackknifeAfterBootstrapRegressor(**v1_params)

elif strategy_key == "CQR":
v0_params = filter_params(MapieQuantileRegressorV0.__init__, v0_params)
v1_params = filter_params(SplitConformalRegressor.__init__, v1_params)
v0 = MapieQuantileRegressorV0(**v0_params)
v1 = ConformalizedQuantileRegressor(**v1_params)

else:
raise ValueError(f"Unknown strategy key: {strategy_key}")

Expand Down
14 changes: 8 additions & 6 deletions mapie_v1/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,14 @@ class CrossConformalRegressor:
each fold models.
cv : Union[int, BaseCrossValidator], default=5
The cross-validation strategy used to compute confomity scores. If an
integer is passed, it is the number of folds for `KFold`
cross-validation. Alternatively, a BaseCrossValidator from scikit-learn
can be provided. Valid options:
TODO : reference here the valid options,
once the list has been be created during the implementation
The cross-validation strategy used to compute confomity scores.
Valid options:
- integer, to specify the number of folds
- any ``sklearn.model_selection.BaseCrossValidator`` suitable for
regression, or a custom cross-validator inheriting from it.
Main variants in the cross conformal setting are:
- ``sklearn.model_selection.KFold`` (vanilla cross conformal)
- ``sklearn.model_selection.LeaveOneOut`` (jackknife)
n_jobs : Optional[int], default=None
The number of jobs to run in parallel when applicable.
Expand Down

0 comments on commit aa6e027

Please sign in to comment.