Skip to content

Commit

Permalink
feat: add Darts forecaster (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
lsorber authored Apr 14, 2024
1 parent 157698d commit fdef314
Show file tree
Hide file tree
Showing 12 changed files with 3,624 additions and 560 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ jobs:
uses: codecov/codecov-action@v4
with:
files: reports/coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ COPY --chown=user:user poetry.lock* pyproject.toml /workspaces/conformal-tights/
RUN mkdir -p /home/user/.cache/pypoetry/ && mkdir -p /home/user/.config/pypoetry/ && \
mkdir -p src/conformal_tights/ && touch src/conformal_tights/__init__.py && touch README.md
RUN --mount=type=cache,uid=$UID,gid=$GID,target=/home/user/.cache/pypoetry/ \
poetry install --only main --no-interaction
poetry install --only main --all-extras --no-interaction



Expand All @@ -74,7 +74,7 @@ USER user

# Install the development Python dependencies in the virtual environment.
RUN --mount=type=cache,uid=$UID,gid=$GID,target=/home/user/.cache/pypoetry/ \
poetry install --no-interaction
poetry install --all-extras --no-interaction

# Persist output generated during docker build so that we can restore it in the dev container.
COPY --chown=user:user .pre-commit-config.yaml /workspaces/conformal-tights/
Expand Down
247 changes: 188 additions & 59 deletions README.md

Large diffs are not rendered by default.

695 changes: 695 additions & 0 deletions notebooks/README.ipynb

Large diffs are not rendered by default.

2,917 changes: 2,437 additions & 480 deletions poetry.lock

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ numpy = ">=1.22.0"
scikit-learn = ">=1.0.0"
scipy = ">=1.5.0"
xgboost = ">=2.0.0"
darts = { version = ">=0.25.0", optional = true }

[tool.poetry.extras] # https://python-poetry.org/docs/pyproject/#extras
darts = ["darts"]

[tool.poetry.group.test.dependencies] # https://python-poetry.org/docs/master/managing-dependencies/
commitizen = ">=3.2.1"
Expand All @@ -43,6 +47,7 @@ shellcheck-py = ">=0.9.0"
cruft = ">=2.14.0"
ipykernel = ">=6.29.2"
matplotlib = ">=3.8.3"
nbconvert = ">=7.16.3"
pdoc = ">=13.1.1"
tabulate = ">=0.9.0"

Expand Down Expand Up @@ -102,6 +107,17 @@ convention = "numpy"

[tool.poe.tasks] # https://github.com/nat-n/poethepoet

[tool.poe.tasks.readme]
help = "Generate this package's README"
cmd = """
jupyter nbconvert notebooks/README.ipynb
--to markdown
--TagRemovePreprocessor.enabled=True
--TagRemovePreprocessor.remove_cell_tags remove_cell
--TagRemovePreprocessor.remove_all_outputs_tags remove_output
--TagRemovePreprocessor.remove_input_tags remove_input
"""

[tool.poe.tasks.docs]
help = "Generate this package's docs"
cmd = """
Expand Down
3 changes: 2 additions & 1 deletion src/conformal_tights/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from conformal_tights._conformal_coherent_quantile_regressor import (
ConformalCoherentQuantileRegressor,
)
from conformal_tights._darts_forecaster import DartsForecaster

__all__ = ["ConformalCoherentQuantileRegressor"]
__all__ = ["ConformalCoherentQuantileRegressor", "DartsForecaster"]
215 changes: 215 additions & 0 deletions src/conformal_tights/_darts_forecaster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""Darts Forecaster."""

from collections.abc import Sequence
from typing import TYPE_CHECKING, Any, TypeVar

import numpy as np
import numpy.typing as npt
from sklearn.utils import check_random_state

if TYPE_CHECKING:
import pandas as pd
else:
try:
import pandas as pd
except ImportError:
pd = None

if TYPE_CHECKING:
from darts import TimeSeries
from darts.models.forecasting.regression_model import (
FUTURE_LAGS_TYPE,
LAGS_TYPE,
RegressionModel,
RegressionModelWithCategoricalCovariates,
_LikelihoodMixin,
)
else:
try:
from darts import TimeSeries
from darts.models.forecasting.regression_model import (
FUTURE_LAGS_TYPE,
LAGS_TYPE,
RegressionModel,
RegressionModelWithCategoricalCovariates,
_LikelihoodMixin,
)
except ImportError:
FUTURE_LAGS_TYPE = None
LAGS_TYPE = None

class TimeSeries: ...

class RegressionModel: ...

class RegressionModelWithCategoricalCovariates: ...

class _LikelihoodMixin: ...


from conformal_tights._conformal_coherent_quantile_regressor import (
ConformalCoherentQuantileRegressor,
)
from conformal_tights._typing import FloatMatrix, FloatTensor, FloatVector

F = TypeVar("F", np.float32, np.float64)


class _DartsAdapter:
def __init__(
self, model: ConformalCoherentQuantileRegressor, quantile: float, quantiles: npt.ArrayLike
):
self.model = model
self.quantile = quantile
self.quantiles = np.asarray(quantiles)

def predict(self, x: "pd.DataFrame", **kwargs: Any) -> FloatMatrix[F]:
# Call ConformalCoherentQuantileRegressor's predict_quantiles.
q = np.asarray(self.model.predict_quantiles(x, quantiles=self.quantiles))
# Filter out the requested quantile.
q = q[:, self.quantiles == self.quantile]
return q


class DartsForecaster(_LikelihoodMixin, RegressionModel):
def __init__( # noqa: PLR0913
self,
# Regressor used by Darts to produce probabilistic forecasts.
model: ConformalCoherentQuantileRegressor,
*,
# Default darts.models.RegressionModel parameters.
lags: LAGS_TYPE | None = None,
lags_past_covariates: LAGS_TYPE | None = None,
lags_future_covariates: FUTURE_LAGS_TYPE | None = None,
output_chunk_length: int = 1,
output_chunk_shift: int = 0,
add_encoders: dict[str, Any] | None = None,
multi_models: bool | None = True,
use_static_covariates: bool = True,
# Default darts.models.RegressionModelWithCategoricalCovariates parameters.
categorical_past_covariates: str | list[str] | None = None,
categorical_future_covariates: str | list[str] | None = None,
categorical_static_covariates: str | list[str] | None = None,
) -> None:
"""Initialize a Darts Conformal Coherent Quantile Regressor."""
# Verify that the required dependencies are installed.
try:
import darts # noqa: F401
import pandas as pd # noqa: F401
except ImportError:
required_dependencies = "Please install darts and pandas to use DartsForecaster."
raise ImportError(required_dependencies) from None
# Initialise _LikelihoodMixin.
self.likelihood = "quantile"
self._model_container = self._get_model_container()
self._rng = check_random_state(model.random_state) # Generator for sampling.
# Initialise darts.models.RegressionModel.
super().__init__(
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
output_chunk_shift=output_chunk_shift,
add_encoders=add_encoders,
multi_models=multi_models,
model=model,
use_static_covariates=use_static_covariates,
)
# Initialise darts.models.RegressionModelWithCategoricalCovariates.
self.categorical_past_covariates = (
[categorical_past_covariates]
if isinstance(categorical_past_covariates, str)
else categorical_past_covariates
)
self.categorical_future_covariates = (
[categorical_future_covariates]
if isinstance(categorical_future_covariates, str)
else categorical_future_covariates
)
self.categorical_static_covariates = (
[categorical_static_covariates]
if isinstance(categorical_static_covariates, str)
else categorical_static_covariates
)

def _create_lagged_data(
self,
target_series: Sequence[TimeSeries],
past_covariates: Sequence[TimeSeries],
future_covariates: Sequence[TimeSeries],
max_samples_per_ts: int,
) -> tuple["pd.DataFrame", FloatVector[F]]:
"""Override training data to add support for categorical covariates."""
# Validate categoricals with RegressionModelWithCategoricalCovariates. We cannot inherit
# from RegressionModelWithCategoricalCovariates because it was developed with LightGBM in
# mind and does not support other regressors like XGBRegressor.
RegressionModelWithCategoricalCovariates._validate_categorical_covariates( # noqa: SLF001
self,
target_series,
past_covariates,
future_covariates,
)
# Identify which columns in the lagged data are categorical.
cat_col_indices, _ = RegressionModelWithCategoricalCovariates._get_categorical_features( # noqa: SLF001
self,
target_series,
past_covariates,
future_covariates,
)
# Create lagged training data.
training_samples, training_labels = super()._create_lagged_data(
target_series, past_covariates, future_covariates, max_samples_per_ts
)
# Convert categorical columns to pd.Categorical so that the wrapped regressor can handle
# them appropriately.
self.cat_col_categories_: dict[float, "pd.Index"] = {}
training_samples_df = pd.DataFrame(training_samples)
cols = training_samples_df.columns
for cat_col_index in cat_col_indices:
cat_col = training_samples_df[cols[cat_col_index]].astype("category")
self.cat_col_categories_[cat_col_index] = cat_col.cat.categories
training_samples_df[cols[cat_col_index]] = cat_col
# Store the (modified) model for filling the model container in _predict_and_sample.
self.central_model_ = self.model
return training_samples_df, training_labels

def _predict_and_sample(
self,
x: FloatMatrix[F],
num_samples: int,
predict_likelihood_parameters: bool, # noqa: FBT001
quantiles: npt.ArrayLike = (0.025, 0.1, 0.25, 0.5, 0.75, 0.9, 0.975),
**kwargs: Any,
) -> FloatMatrix[F] | FloatTensor[F]:
"""Override inference data to add support for categorical covariates."""
# Instead of choosing the quantiles at initialisation time, allow users to set the quantiles
# of DartsForecaster.predict at prediction time.
if getattr(self, "quantiles", None) != quantiles:
self.quantiles, self._median_idx = self._prepare_quantiles(quantiles)
self._model_container.clear()
for quantile in self.quantiles:
self._model_container[quantile] = _DartsAdapter(
self.central_model_, quantile, self.quantiles
)
# Convert categorical columns to pd.Categorical so that the wrapped regressor can handle
# them appropriately.
x_df = pd.DataFrame(x)
for cat_col_index, cat_col_categories in self.cat_col_categories_.items():
x_df[x_df.columns[cat_col_index]] = pd.Categorical(
x_df[x_df.columns[cat_col_index]], categories=cat_col_categories
)
# Call _LikelihoodMixin._predict_and_sample_likelihood to enable probabilistic forecasting.
outputs: FloatMatrix[F] | FloatTensor[F] = self._predict_and_sample_likelihood(
x_df, num_samples, self.likelihood, predict_likelihood_parameters, **kwargs
)
return outputs

@property
def supports_probabilistic_prediction(self) -> bool:
"""Indicate that this is a probabilistic model for darts > 0.28."""
return True

@property
def _is_probabilistic(self) -> bool:
"""Indicate that this is a probabilistic model for darts <= 0.28."""
return True
1 change: 1 addition & 0 deletions src/conformal_tights/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@

FloatVector: TypeAlias = npt.NDArray[F]
FloatMatrix: TypeAlias = npt.NDArray[F]
FloatTensor: TypeAlias = npt.NDArray[F]
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import pytest
import sklearn.datasets
from _pytest.fixtures import SubRequest
from lightgbm import LGBMRegressor
from sklearn.base import BaseEstimator
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

Dataset: TypeAlias = tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]

Expand All @@ -32,3 +35,16 @@ def dataset(request: SubRequest) -> Dataset:
# Split in train and test set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
return X_train, X_test, y_train, y_test


@pytest.fixture(
params=[
pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor-L1"),
pytest.param(XGBRegressor(objective="reg:squarederror"), id="model:XGBRegressor-L2"),
pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor-L1"),
pytest.param(LGBMRegressor(objective="regression_l2"), id="model:LGBMRegressor-L2"),
]
)
def regressor(request: SubRequest) -> BaseEstimator:
"""Return a scikit-learn regressor."""
return request.param
20 changes: 2 additions & 18 deletions tests/test_conformal_quantile_regressor.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""Test the Conformal Coherent Quantile Regressor."""

import numpy as np
import pytest
from _pytest.fixtures import SubRequest
from lightgbm import LGBMRegressor
from sklearn.base import BaseEstimator
from sklearn.utils.estimator_checks import check_estimator
from xgboost import XGBRegressor
Expand All @@ -12,19 +9,6 @@
from tests.conftest import Dataset


@pytest.fixture(
params=[
pytest.param(XGBRegressor(objective="reg:absoluteerror"), id="model:XGBRegressor-L1"),
pytest.param(XGBRegressor(objective="reg:squarederror"), id="model:XGBRegressor-L2"),
pytest.param(LGBMRegressor(objective="regression_l1"), id="model:LGBMRegressor-L1"),
pytest.param(LGBMRegressor(objective="regression_l2"), id="model:LGBMRegressor-L2"),
]
)
def regressor(request: SubRequest) -> BaseEstimator:
"""Return a regressor."""
return request.param


def test_conformal_quantile_regressor_coverage(dataset: Dataset, regressor: BaseEstimator) -> None:
"""Test ConformalCoherentQuantileRegressor's coverage."""
# Unpack the dataset.
Expand All @@ -34,8 +18,8 @@ def test_conformal_quantile_regressor_coverage(dataset: Dataset, regressor: Base
model.fit(X_train, y_train)
# Verify the coherence of the predicted quantiles.
ŷ_quantiles = model.predict(X_test, quantiles=np.linspace(0.1, 0.9, 3))
for i in range(ŷ_quantiles.shape[1] - 1):
assert np.all(ŷ_quantiles.iloc[:, i] <= ŷ_quantiles.iloc[:, i + 1])
for j in range(ŷ_quantiles.shape[1] - 1):
assert np.all(ŷ_quantiles.iloc[:, j] <= ŷ_quantiles.iloc[:, j + 1])
# Verify the coverage of the predicted intervals.
for desired_coverage in (0.7, 0.8, 0.9):
ŷ_interval = model.predict(X_test, coverage=desired_coverage)
Expand Down
Loading

0 comments on commit fdef314

Please sign in to comment.