feat: improve coherence with intercept clipping (#5)

superlinear-ai · Mar 29, 2024 · d4830b9 · d4830b9
1 parent be17d26
commit d4830b9
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -51,22 +51,23 @@ When the input data is a pandas DataFrame, the output is also a pandas DataFrame
 
 |   house_id |   0.025 |   0.05 |    0.1 |    0.9 |   0.95 |   0.975 |
 |-----------:|--------:|-------:|-------:|-------:|-------:|--------:|
-|       1357 |  121557 | 130272 | 139913 | 189399 | 211177 |  237309 |
-|       2367 |   86005 |  92617 |  98591 | 130236 | 145686 |  164766 |
-|       2822 |  116523 | 121711 | 134993 | 175583 | 194964 |  216891 |
-|       2126 |  105712 | 113784 | 122145 | 164330 | 183352 |  206224 |
-|       1544 |   85920 |  92311 |  99130 | 133228 | 148895 |  167969 |
+|       1357 |  114783 | 120894 | 131618 | 175760 | 188051 |  205448 |
+|       2367 |   67416 |  80073 |  86753 | 117854 | 127582 |  142321 |
+|       2822 |  119422 | 132047 | 138724 | 178526 | 197246 |  214205 |
+|       2126 |   94030 |  99849 | 110891 | 150249 | 164703 |  182528 |
+|       1544 |   68996 |  81516 |  88231 | 121774 | 132425 |  147110 |
 
 Let's visualize the predicted quantiles on the test set:
 
-<img src="https://github.com/radix-ai/conformal-tights/assets/4543654/b02b3797-de6a-4e0d-b457-ed8e50e3f42c" width="512">
+<img src="https://github.com/radix-ai/conformal-tights/assets/4543654/af22b3c8-7bc6-44fd-b96b-24e8d28bc1fd" width="512">
 
 <details>
 <summary>Expand to see the code that generated the graph above</summary>
 
 ```python
 import matplotlib.pyplot as plt
 import matplotlib.ticker as ticker
+
 %config InlineBackend.figure_format = "retina"
 plt.rcParams["font.size"] = 8
 idx = (-ŷ_test.sample(50, random_state=42)).sort_values().index
@@ -115,11 +116,11 @@ When the input data is a pandas DataFrame, the output is also a pandas DataFrame
 
 |   house_id |   0.025 |   0.975 |
 |-----------:|--------:|--------:|
-|       1357 |  108489 |  238396 |
-|       2367 |   76043 |  165189 |
-|       2822 |  101319 |  220247 |
-|       2126 |   94238 |  207501 |
-|       1544 |   75976 |  168741 |
+|       1357 |  107202 |  206290 |
+|       2367 |   66665 |  146004 |
+|       2822 |  115591 |  220314 |
+|       2126 |   85288 |  183037 |
+|       1544 |   67889 |  150646 |
 
 ## Contributing
 

diff --git a/src/conformal_tights/_coherent_linear_quantile_regressor.py b/src/conformal_tights/_coherent_linear_quantile_regressor.py
@@ -27,7 +27,7 @@ def coherent_linear_quantile_regression(
     quantiles: FloatVector[F],
     sample_weight: FloatVector[F] | None = None,
     coherence_buffer: int = 3,
-) -> FloatMatrix[F]:
+) -> tuple[FloatMatrix[F], FloatMatrix[F]]:
     """Solve a Coherent Linear Quantile Regression problem.
 
     Minimizes the quantile loss:
@@ -67,6 +67,8 @@ def coherent_linear_quantile_regression(
     -------
     β
         The estimated regression coefficients so that Xβ produces quantile predictions ŷ.
+    β_full
+        The estimated regression coefficients including all auxiliary quantiles.
     """
     # Learn the input dimensions.
     num_samples, num_features = X.shape
@@ -170,10 +172,11 @@ def coherent_linear_quantile_regression(
     # Solve the Coherent Quantile Regression LP.
     result = linprog(c=c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method="highs")
     # Extract the solution.
-    β: FloatVector[F] = result.x[: num_quantiles * num_features].astype(y.dtype)
-    β = β.reshape(num_quantiles, num_features).T
-    β = β[:, 0 :: (coherence_buffer + 1)]  # Drop the buffer quantile ranks we introduced earlier.
-    return β
+    β_full: FloatMatrix[F] = result.x[: num_quantiles * num_features].astype(y.dtype)
+    β_full = β_full.reshape(num_quantiles, num_features).T
+    # Drop the buffer quantile ranks we introduced earlier.
+    β = β_full[:, 0 :: (coherence_buffer + 1)]
+    return β, β_full
 
 
 class CoherentLinearQuantileRegressor(RegressorMixin, BaseEstimator):
@@ -224,7 +227,7 @@ def fit(
         if self.fit_intercept:
             X = np.hstack([X, np.ones((X.shape[0], 1), dtype=X.dtype)])
         # Fit the coherent quantile regression model.
-        self.β_ = coherent_linear_quantile_regression(
+        self.β_, self.β_full_ = coherent_linear_quantile_regression(
             X,
             y,
             quantiles=np.asarray(self.quantiles),
@@ -246,3 +249,18 @@ def predict(self, X: FloatMatrix[F]) -> FloatMatrix[F]:
         # Map back to the training target dtype.
         ŷ = np.squeeze(ŷ.astype(self.y_dtype_), axis=1 if ŷ.shape[1] == 1 else ())
         return ŷ
+
+    def intercept_clip(self, X: FloatMatrix[F], y: FloatVector[F]) -> FloatMatrix[F]:
+        """Compute a clip for a delta on the intercept that retains quantile coherence."""
+        if self.fit_intercept:
+            X = np.hstack([X, np.ones((X.shape[0], 1), dtype=X.dtype)])
+        Q = X @ self.β_full_ - y[:, np.newaxis]
+        β_intercept_clip = np.vstack(
+            [
+                np.insert(np.max(Q[:, :-1] - Q[:, 1:], axis=0), 0, -np.inf),
+                np.append(np.min(Q[:, 1:] - Q[:, :-1], axis=0), np.inf),
+            ]
+        )
+        β_intercept_clip[:, β_intercept_clip[0, :] >= β_intercept_clip[1, :]] = 0
+        β_intercept_clip = β_intercept_clip[:, 0 :: (self.coherence_buffer + 1)]
+        return β_intercept_clip
diff --git a/src/conformal_tights/_conformal_coherent_quantile_regressor.py b/src/conformal_tights/_conformal_coherent_quantile_regressor.py
@@ -122,12 +122,15 @@ def fit(
         sample_weight_train, sample_weight_calib = (
             sample_weights[:2] if sample_weight is not None else (None, None)
         )
-        # Split the conformal calibration set into two levels.
+        # Split the conformal calibration set into two levels. If would be less than 128 level 2
+        # examples, use all of them for level 1 instead.
         X_calib_l1, X_calib_l2, y_calib_l1, y_calib_l2, *sample_weights_calib = train_test_split(
             self.X_calib_,
             self.y_calib_,
             *([sample_weight_calib] if sample_weight_calib is not None else []),
-            test_size=self.conformal_calibration_size[0],
+            test_size=self.conformal_calibration_size[0]
+            if round(self.conformal_calibration_size[0] * X.shape[0]) >= 128  # noqa: PLR2004
+            else 1,
             random_state=self.random_state,
         )
         self.sample_weight_calib_l1_, self.sample_weight_calib_l2_ = (
@@ -195,24 +198,29 @@ def _lazily_fit_conformal_predictor(
             # residuals.
             eps = np.finfo(self.ŷ_calib_l1_.dtype).eps
             abs_ŷ_calib_l1 = np.maximum(np.abs(self.ŷ_calib_l1_), eps)
-            X_cqr = self.ŷ_calib_l1_nonconformity_
-            y_cqr = self.residuals_calib_l1_ / (abs_ŷ_calib_l1 if "/ŷ" in target_type else 1)
+            X_cqr_l1 = self.ŷ_calib_l1_nonconformity_
+            y_cqr_l1 = -self.residuals_calib_l1_ / (abs_ŷ_calib_l1 if "/ŷ" in target_type else 1)
             cqr_l1 = CoherentLinearQuantileRegressor(quantiles=quantiles)
-            cqr_l1.fit(X_cqr, y_cqr, sample_weight=self.sample_weight_calib_l1_)
+            cqr_l1.fit(X_cqr_l1, y_cqr_l1, sample_weight=self.sample_weight_calib_l1_)
             self.conformal_l1_[target_type][quantiles_tuple] = cqr_l1
             # Fit level 2: a per-quantile conformal bias on top of the level 1 conformal quantile
             # predictions of the (relative) residuals.
-            abs_ŷ_calib_l2 = np.maximum(np.abs(self.ŷ_calib_l2_), eps)
-            Δŷ_calib_l2_quantiles = cqr_l1.predict(self.ŷ_calib_l2_nonconformity_)
-            bias_l2 = np.empty(quantiles.shape, dtype=self.ŷ_calib_l1_.dtype)
-            for j, quantile in enumerate(quantiles):
-                bias_l2[j] = np.quantile(
-                    -(
-                        (self.residuals_calib_l2_ / (abs_ŷ_calib_l2 if "/ŷ" in target_type else 1))
-                        + Δŷ_calib_l2_quantiles[:, j]
-                    ),
-                    quantile,
+            bias_l2 = np.zeros(quantiles.shape, dtype=self.ŷ_calib_l1_.dtype)
+            if len(self.ŷ_calib_l2_) >= 128:  # noqa: PLR2004
+                abs_ŷ_calib_l2 = np.maximum(np.abs(self.ŷ_calib_l2_), eps)
+                X_cqr_l2 = self.ŷ_calib_l2_nonconformity_
+                y_cqr_l2 = -self.residuals_calib_l2_ / (
+                    abs_ŷ_calib_l2 if "/ŷ" in target_type else 1
                 )
+                Δŷ_calib_l2_quantiles = cqr_l1.predict(X_cqr_l2)
+                intercept_clip = cqr_l1.intercept_clip(
+                    np.vstack([X_cqr_l1, X_cqr_l2]), np.hstack([y_cqr_l1, y_cqr_l2])
+                )
+                for j, quantile in enumerate(quantiles):
+                    # Clip the bias to retain quantile coherence.
+                    # TODO: Use a weighted quantile.
+                    intercept_l2 = np.quantile(y_cqr_l2 - Δŷ_calib_l2_quantiles[:, j], quantile)
+                    bias_l2[j] = np.clip(intercept_l2, intercept_clip[0, j], intercept_clip[1, j])
             self.conformal_l2_[target_type][quantiles_tuple] = bias_l2
         return cqr_l1, bias_l2  # type: ignore[return-value]
 

diff --git a/tests/test_conformal_quantile_regressor.py b/tests/test_conformal_quantile_regressor.py
@@ -14,8 +14,10 @@
 
 @pytest.fixture(
     params=[
-        pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor"),
-        pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor"),
+        pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor-L1"),
+        pytest.param(XGBRegressor(objective="reg:squarederror"), id="model:XGBRegressor-L2"),
+        pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor-L1"),
+        pytest.param(LGBMRegressor(objective="regression_l2"), id="model:LGBMRegressor-L2"),
     ]
 )
 def regressor(request: SubRequest) -> BaseEstimator: