Skip to content

Add regressors property to TSDataset #82

Merged
merged 10 commits into from
Oct 4, 2021
Next Next commit
Add regressors property, rewrite make_future to use it
d.a.bunin committed Sep 22, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 2b53dc1be23b5af593c32fd9c69653a93fb5b160
28 changes: 19 additions & 9 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import math
import warnings
from collections import defaultdict
from typing import TYPE_CHECKING
from typing import Dict
from typing import Iterable
from typing import List
from typing import Optional
@@ -154,15 +156,13 @@ def make_future(self, future_steps: int) -> "TSDataset":
df = self._merge_exog(df)

# check if we have enough values in regressors
for segment in self.segments:
regressors_columns = [x for x in self.df_exog[segment].columns if x.startswith("regressor")]
if regressors_columns:
regressors_index = self.df_exog.loc[:, pd.IndexSlice[segment, regressors_columns]].index
if not np.all(future_dates.isin(regressors_index)):
warnings.warn(
f"Some regressors don't have enough values in segment {segment}, "
f"NaN-s will be used for missing values"
)
for segment, regressors_columns in self.regressors.items():
regressors_index = self.df_exog.loc[:, pd.IndexSlice[segment, regressors_columns]].index
if not np.all(future_dates.isin(regressors_index)):
warnings.warn(
f"Some regressors don't have enough values in segment {segment}, "
f"NaN-s will be used for missing values"
)

if self.transforms is not None:
for transform in self.transforms:
@@ -221,6 +221,16 @@ def segments(self) -> List[str]:
"""Get list of all segments in dataset."""
return self.df.columns.get_level_values("segment").unique().tolist()

@property
def regressors(self) -> Dict[str, List[str]]:
"""Get list of all regressors of segments in dataset."""
columns = self.df.columns.values
result = defaultdict(list)
for segment_name, feature_name in columns:
if feature_name.startswith("regressor"):
result[segment_name].append(feature_name)
return dict(result)

def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[Sequence] = None):
""" Plot of random or chosen segments.

11 changes: 9 additions & 2 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
@@ -43,8 +43,8 @@ def df_and_regressors() -> Tuple[pd.DataFrame, pd.DataFrame]:
df = TSDataset.to_dataset(df)

timestamp = pd.date_range("2021-01-01", "2021-02-11")
df_1 = pd.DataFrame({"timestamp": timestamp, "regressor_aaa": 1, "segment": "1"})
df_2 = pd.DataFrame({"timestamp": timestamp[5:], "regressor_aaa": 2, "segment": "2"})
df_1 = pd.DataFrame({"timestamp": timestamp, "regressor_1": 1, "regressor_2": 2, "segment": "1"})
df_2 = pd.DataFrame({"timestamp": timestamp[5:], "regressor_1": 3, "regressor_2": 4, "segment": "2"})
df_exog = pd.concat([df_1, df_2], ignore_index=True)
df_exog = TSDataset.to_dataset(df_exog)

@@ -197,3 +197,10 @@ def test_getitem_all_indexes(tsdf_with_exog):
df_slice = tsdf_with_exog[:, :, :]
df_expected = tsdf_with_exog.df
pd.testing.assert_frame_equal(df_expected, df_slice)


def test_finding_regressors(df_and_regressors):
"""Check that ts.regressors property works correctly."""
df, df_exog = df_and_regressors
ts = TSDataset(df=df, df_exog=df_exog, freq="D")
assert ts.regressors == {"1": ["regressor_1", "regressor_2"], "2": ["regressor_1", "regressor_2"]}