Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve coherence with intercept clipping #5

Merged
merged 2 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,23 @@ When the input data is a pandas DataFrame, the output is also a pandas DataFrame

| house_id | 0.025 | 0.05 | 0.1 | 0.9 | 0.95 | 0.975 |
|-----------:|--------:|-------:|-------:|-------:|-------:|--------:|
| 1357 | 121557 | 130272 | 139913 | 189399 | 211177 | 237309 |
| 2367 | 86005 | 92617 | 98591 | 130236 | 145686 | 164766 |
| 2822 | 116523 | 121711 | 134993 | 175583 | 194964 | 216891 |
| 2126 | 105712 | 113784 | 122145 | 164330 | 183352 | 206224 |
| 1544 | 85920 | 92311 | 99130 | 133228 | 148895 | 167969 |
| 1357 | 114783 | 120894 | 131618 | 175760 | 188051 | 205448 |
| 2367 | 67416 | 80073 | 86753 | 117854 | 127582 | 142321 |
| 2822 | 119422 | 132047 | 138724 | 178526 | 197246 | 214205 |
| 2126 | 94030 | 99849 | 110891 | 150249 | 164703 | 182528 |
| 1544 | 68996 | 81516 | 88231 | 121774 | 132425 | 147110 |

Let's visualize the predicted quantiles on the test set:

<img src="https://github.com/radix-ai/conformal-tights/assets/4543654/b02b3797-de6a-4e0d-b457-ed8e50e3f42c" width="512">
<img src="https://github.com/radix-ai/conformal-tights/assets/4543654/af22b3c8-7bc6-44fd-b96b-24e8d28bc1fd" width="512">

<details>
<summary>Expand to see the code that generated the graph above</summary>

```python
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

%config InlineBackend.figure_format = "retina"
plt.rcParams["font.size"] = 8
idx = (-ŷ_test.sample(50, random_state=42)).sort_values().index
Expand Down Expand Up @@ -115,11 +116,11 @@ When the input data is a pandas DataFrame, the output is also a pandas DataFrame

| house_id | 0.025 | 0.975 |
|-----------:|--------:|--------:|
| 1357 | 108489 | 238396 |
| 2367 | 76043 | 165189 |
| 2822 | 101319 | 220247 |
| 2126 | 94238 | 207501 |
| 1544 | 75976 | 168741 |
| 1357 | 107202 | 206290 |
| 2367 | 66665 | 146004 |
| 2822 | 115591 | 220314 |
| 2126 | 85288 | 183037 |
| 1544 | 67889 | 150646 |

## Contributing

Expand Down
30 changes: 24 additions & 6 deletions src/conformal_tights/_coherent_linear_quantile_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def coherent_linear_quantile_regression(
quantiles: FloatVector[F],
sample_weight: FloatVector[F] | None = None,
coherence_buffer: int = 3,
) -> FloatMatrix[F]:
) -> tuple[FloatMatrix[F], FloatMatrix[F]]:
"""Solve a Coherent Linear Quantile Regression problem.

Minimizes the quantile loss:
Expand Down Expand Up @@ -67,6 +67,8 @@ def coherent_linear_quantile_regression(
-------
β
The estimated regression coefficients so that Xβ produces quantile predictions ŷ.
β_full
The estimated regression coefficients including all auxiliary quantiles.
"""
# Learn the input dimensions.
num_samples, num_features = X.shape
Expand Down Expand Up @@ -170,10 +172,11 @@ def coherent_linear_quantile_regression(
# Solve the Coherent Quantile Regression LP.
result = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
# Extract the solution.
β: FloatVector[F] = result.x[: num_quantiles * num_features].astype(y.dtype)
β = β.reshape(num_quantiles, num_features).T
β = β[:, 0 :: (coherence_buffer + 1)] # Drop the buffer quantile ranks we introduced earlier.
return β
β_full: FloatMatrix[F] = result.x[: num_quantiles * num_features].astype(y.dtype)
β_full = β_full.reshape(num_quantiles, num_features).T
# Drop the buffer quantile ranks we introduced earlier.
β = β_full[:, 0 :: (coherence_buffer + 1)]
return β, β_full


class CoherentLinearQuantileRegressor(RegressorMixin, BaseEstimator):
Expand Down Expand Up @@ -224,7 +227,7 @@ def fit(
if self.fit_intercept:
X = np.hstack([X, np.ones((X.shape[0], 1), dtype=X.dtype)])
# Fit the coherent quantile regression model.
self.β_ = coherent_linear_quantile_regression(
self.β_, self.β_full_ = coherent_linear_quantile_regression(
X,
y,
quantiles=np.asarray(self.quantiles),
Expand All @@ -246,3 +249,18 @@ def predict(self, X: FloatMatrix[F]) -> FloatMatrix[F]:
# Map back to the training target dtype.
ŷ = np.squeeze(ŷ.astype(self.y_dtype_), axis=1 if ŷ.shape[1] == 1 else ())
return ŷ

def intercept_clip(self, X: FloatMatrix[F], y: FloatVector[F]) -> FloatMatrix[F]:
"""Compute a clip for a delta on the intercept that retains quantile coherence."""
if self.fit_intercept:
X = np.hstack([X, np.ones((X.shape[0], 1), dtype=X.dtype)])
Q = X @ self.β_full_ - y[:, np.newaxis]
β_intercept_clip = np.vstack(
[
np.insert(np.max(Q[:, :-1] - Q[:, 1:], axis=0), 0, -np.inf),
np.append(np.min(Q[:, 1:] - Q[:, :-1], axis=0), np.inf),
]
)
β_intercept_clip[:, β_intercept_clip[0, :] >= β_intercept_clip[1, :]] = 0
β_intercept_clip = β_intercept_clip[:, 0 :: (self.coherence_buffer + 1)]
return β_intercept_clip
38 changes: 23 additions & 15 deletions src/conformal_tights/_conformal_coherent_quantile_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,15 @@ def fit(
sample_weight_train, sample_weight_calib = (
sample_weights[:2] if sample_weight is not None else (None, None)
)
# Split the conformal calibration set into two levels.
# Split the conformal calibration set into two levels. If would be less than 128 level 2
# examples, use all of them for level 1 instead.
X_calib_l1, X_calib_l2, y_calib_l1, y_calib_l2, *sample_weights_calib = train_test_split(
self.X_calib_,
self.y_calib_,
*([sample_weight_calib] if sample_weight_calib is not None else []),
test_size=self.conformal_calibration_size[0],
test_size=self.conformal_calibration_size[0]
if round(self.conformal_calibration_size[0] * X.shape[0]) >= 128 # noqa: PLR2004
else 1,
random_state=self.random_state,
)
self.sample_weight_calib_l1_, self.sample_weight_calib_l2_ = (
Expand Down Expand Up @@ -195,24 +198,29 @@ def _lazily_fit_conformal_predictor(
# residuals.
eps = np.finfo(self.ŷ_calib_l1_.dtype).eps
abs_ŷ_calib_l1 = np.maximum(np.abs(self.ŷ_calib_l1_), eps)
X_cqr = self.ŷ_calib_l1_nonconformity_
y_cqr = self.residuals_calib_l1_ / (abs_ŷ_calib_l1 if "/ŷ" in target_type else 1)
X_cqr_l1 = self.ŷ_calib_l1_nonconformity_
y_cqr_l1 = -self.residuals_calib_l1_ / (abs_ŷ_calib_l1 if "/ŷ" in target_type else 1)
cqr_l1 = CoherentLinearQuantileRegressor(quantiles=quantiles)
cqr_l1.fit(X_cqr, y_cqr, sample_weight=self.sample_weight_calib_l1_)
cqr_l1.fit(X_cqr_l1, y_cqr_l1, sample_weight=self.sample_weight_calib_l1_)
self.conformal_l1_[target_type][quantiles_tuple] = cqr_l1
# Fit level 2: a per-quantile conformal bias on top of the level 1 conformal quantile
# predictions of the (relative) residuals.
abs_ŷ_calib_l2 = np.maximum(np.abs(self.ŷ_calib_l2_), eps)
Δŷ_calib_l2_quantiles = cqr_l1.predict(self.ŷ_calib_l2_nonconformity_)
bias_l2 = np.empty(quantiles.shape, dtype=self.ŷ_calib_l1_.dtype)
for j, quantile in enumerate(quantiles):
bias_l2[j] = np.quantile(
-(
(self.residuals_calib_l2_ / (abs_ŷ_calib_l2 if "/ŷ" in target_type else 1))
+ Δŷ_calib_l2_quantiles[:, j]
),
quantile,
bias_l2 = np.zeros(quantiles.shape, dtype=self.ŷ_calib_l1_.dtype)
if len(self.ŷ_calib_l2_) >= 128: # noqa: PLR2004
abs_ŷ_calib_l2 = np.maximum(np.abs(self.ŷ_calib_l2_), eps)
X_cqr_l2 = self.ŷ_calib_l2_nonconformity_
y_cqr_l2 = -self.residuals_calib_l2_ / (
abs_ŷ_calib_l2 if "/ŷ" in target_type else 1
)
Δŷ_calib_l2_quantiles = cqr_l1.predict(X_cqr_l2)
intercept_clip = cqr_l1.intercept_clip(
np.vstack([X_cqr_l1, X_cqr_l2]), np.hstack([y_cqr_l1, y_cqr_l2])
)
for j, quantile in enumerate(quantiles):
# Clip the bias to retain quantile coherence.
# TODO: Use a weighted quantile.
intercept_l2 = np.quantile(y_cqr_l2 - Δŷ_calib_l2_quantiles[:, j], quantile)
bias_l2[j] = np.clip(intercept_l2, intercept_clip[0, j], intercept_clip[1, j])
self.conformal_l2_[target_type][quantiles_tuple] = bias_l2
return cqr_l1, bias_l2 # type: ignore[return-value]

Expand Down
6 changes: 4 additions & 2 deletions tests/test_conformal_quantile_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

@pytest.fixture(
params=[
pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor"),
pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor"),
pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor-L1"),
pytest.param(XGBRegressor(objective="reg:squarederror"), id="model:XGBRegressor-L2"),
pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor-L1"),
pytest.param(LGBMRegressor(objective="regression_l2"), id="model:LGBMRegressor-L2"),
]
)
def regressor(request: SubRequest) -> BaseEstimator:
Expand Down
Loading