diff --git a/etna/transforms/missing_values/imputation.py b/etna/transforms/missing_values/imputation.py index 371e64aaf..3f1b5533b 100644 --- a/etna/transforms/missing_values/imputation.py +++ b/etna/transforms/missing_values/imputation.py @@ -1,11 +1,11 @@ from enum import Enum +from typing import Dict from typing import List from typing import Optional import numpy as np import pandas as pd -from etna.transforms.base import PerSegmentWrapper from etna.transforms.base import Transform @@ -19,8 +19,8 @@ class ImputerMode(str, Enum): seasonal = "seasonal" -class _OneSegmentTimeSeriesImputerTransform(Transform): - """One segment version of transform to fill NaNs in series of a given dataframe. +class TimeSeriesImputerTransform(Transform): + """Transform to fill NaNs in series of a given dataframe. - It is assumed that given series begins with first non NaN value. @@ -28,11 +28,22 @@ class _OneSegmentTimeSeriesImputerTransform(Transform): - This transform can't fill NaNs if all values are NaNs. In this case exception is raised. + Warning + ------- + This transform can suffer from look-ahead bias in 'mean' mode. For transforming data at some timestamp + it uses information from the whole train part. """ - def __init__(self, in_column: str, strategy: str, window: int, seasonality: int, default_value: Optional[float]): + def __init__( + self, + in_column: str = "target", + strategy: str = ImputerMode.zero.value, + window: int = -1, + seasonality: int = 1, + default_value: Optional[float] = None, + ): """ - Create instance of _OneSegmentTimeSeriesImputerTransform. + Create instance of TimeSeriesImputerTransform. Parameters ---------- @@ -69,192 +80,140 @@ def __init__(self, in_column: str, strategy: str, window: int, seasonality: int, if incorrect strategy given """ self.in_column = in_column - self.strategy = ImputerMode(strategy) + self.strategy = strategy self.window = window self.seasonality = seasonality self.default_value = default_value - self.fill_value: Optional[int] = None - self.nan_timestamps: Optional[List[pd.Timestamp]] = None + self._strategy = ImputerMode(strategy) + self._fill_value: Dict[str, int] = {} + self._nan_timestamps: Dict[str, List[pd.Timestamp]] = {} - def fit(self, df: pd.DataFrame) -> "_OneSegmentTimeSeriesImputerTransform": - """ - Fit preprocess params. + def fit(self, df: pd.DataFrame) -> "TimeSeriesImputerTransform": + """Fit params. Parameters ---------- - df: pd.DataFrame - dataframe with series to fit preprocess params with + df: + dataframe with data. Returns ------- - self: _OneSegmentTimeSeriesImputerTransform - fitted preprocess + result: TimeSeriesImputerTransform """ - raw_series = df[self.in_column] - if np.all(raw_series.isna()): + segments = sorted(set(df.columns.get_level_values("segment"))) + features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + if features.isna().all().any(): raise ValueError("Series hasn't non NaN values which means it is empty and can't be filled.") - series = raw_series[raw_series.first_valid_index() :] - self.nan_timestamps = series[series.isna()].index - if self.strategy == ImputerMode.zero: - self.fill_value = 0 - elif self.strategy == ImputerMode.mean: - self.fill_value = series.mean() + + for segment in segments: + series = features.loc[:, pd.IndexSlice[segment, self.in_column]] + series = series[series.first_valid_index() :] + self._nan_timestamps[segment] = series[series.isna()].index + + if self._strategy == ImputerMode.mean: + mean_values = features.mean().to_dict() + # take only segment from multiindex key + mean_values = {key[0]: value for key, value in mean_values.items()} + self._fill_value = mean_values + return self def transform(self, df: pd.DataFrame) -> pd.DataFrame: - """ - Transform given series. + """Fill nans in the dataset. Parameters ---------- - df: pd.Dataframe - transform ``in_column`` series of given dataframe + df: + dataframe with data to transform. Returns ------- - result: pd.DataFrame - dataframe with in_column series with filled gaps + result: pd.Dataframe + transformed dataframe """ - result_df = df.copy() - cur_nans = result_df[result_df[self.in_column].isna()].index - - result_df[self.in_column] = self._fill(result_df[self.in_column]) + segments = sorted(set(df.columns.get_level_values("segment"))) - # restore nans not in self.nan_timestamps - restore_nans = cur_nans.difference(self.nan_timestamps) - result_df.loc[restore_nans, self.in_column] = np.nan + cur_nans = {} + for segment in segments: + series = df.loc[:, pd.IndexSlice[segment, self.in_column]] + cur_nans[segment] = series[series.isna()].index - return result_df + result_df = self._fill(df) - def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: - """ - Inverse transform dataframe. - - Parameters - ---------- - df: pd.Dataframe - inverse transform ``in_column`` series of given dataframe + # restore nans not in self.nan_timestamps + for segment in segments: + restore_nans = cur_nans[segment].difference(self._nan_timestamps[segment]) + result_df.loc[restore_nans, pd.IndexSlice[segment, self.in_column]] = np.nan - Returns - ------- - result: pd.DataFrame - dataframe with in_column series with initial values - """ - result_df = df.copy() - index = result_df.index.intersection(self.nan_timestamps) - result_df.loc[index, self.in_column] = np.nan return result_df - def _fill(self, df: pd.Series) -> pd.Series: - """ - Create new Series taking all previous dates and adding missing dates. + def _fill(self, df: pd.DataFrame) -> pd.DataFrame: + """Create new Series taking all previous dates and adding missing dates. Fills missed values for new dates according to ``self.strategy`` Parameters ---------- - df: pd.Series - series to fill + df: pd.DataFrame + dataframe to fill Returns ------- - result: pd.Series + result: pd.DataFrame """ - if self.nan_timestamps is None: + if len(self._nan_timestamps) == 0: raise ValueError("Trying to apply the unfitted transform! First fit the transform.") - if self.strategy == ImputerMode.zero or self.strategy == ImputerMode.mean: - df = df.fillna(value=self.fill_value) - elif self.strategy == ImputerMode.forward_fill: - df = df.fillna(method="ffill") - elif self.strategy == ImputerMode.running_mean or self.strategy == ImputerMode.seasonal: - history = self.seasonality * self.window if self.window != -1 else len(df) - timestamps = list(df.index) - for timestamp in self.nan_timestamps: - i = timestamps.index(timestamp) - indexes = np.arange(i - self.seasonality, i - self.seasonality - history, -self.seasonality) - indexes = indexes[indexes >= 0] - df.iloc[i] = np.nanmean(df.iloc[indexes]) + segments = sorted(set(df.columns.get_level_values("segment"))) + result_df = df.copy(deep=True) + + if self._strategy == ImputerMode.zero: + # we can't just do `result_df.fillna(value=0)`, it leads to errors if category dtype is present + result_df.loc[:, pd.IndexSlice[segments, self.in_column]] = result_df.loc[ + :, pd.IndexSlice[segments, self.in_column] + ].fillna(value=0) + elif self._strategy == ImputerMode.forward_fill: + result_df.fillna(method="ffill", inplace=True) + elif self._strategy == ImputerMode.mean: + for segment in segments: + result_df.loc[:, pd.IndexSlice[segment, self.in_column]].fillna( + value=self._fill_value[segment], inplace=True + ) + elif self._strategy == ImputerMode.running_mean or self._strategy == ImputerMode.seasonal: + for segment in segments: + history = self.seasonality * self.window if self.window != -1 else len(df) + timestamps = list(df.index) + for timestamp in self._nan_timestamps[segment]: + i = timestamps.index(timestamp) + indexes = np.arange(i - self.seasonality, i - self.seasonality - history, -self.seasonality) + indexes = indexes[indexes >= 0] + values = result_df.loc[result_df.index[indexes], pd.IndexSlice[segment, self.in_column]] + result_df.loc[timestamp, pd.IndexSlice[segment, self.in_column]] = np.nanmean(values) if self.default_value: - df = df.fillna(value=self.default_value) - return df - - -class TimeSeriesImputerTransform(PerSegmentWrapper): - """Transform to fill NaNs in series of a given dataframe. - - - It is assumed that given series begins with first non NaN value. - - - This transform can't fill NaNs in the future, only on train data. - - - This transform can't fill NaNs if all values are NaNs. In this case exception is raised. - - Warning - ------- - This transform can suffer from look-ahead bias in 'mean' mode. For transforming data at some timestamp - it uses information from the whole train part. - """ + result_df = result_df.fillna(value=self.default_value) + return result_df - def __init__( - self, - in_column: str = "target", - strategy: str = ImputerMode.zero, - window: int = -1, - seasonality: int = 1, - default_value: Optional[float] = None, - ): - """ - Create instance of TimeSeriesImputerTransform. + def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: + """Apply inverse transformation to the dataset. Parameters ---------- - in_column: - name of processed column - strategy: - filling value in missing timestamps: - - - If "zero", then replace missing dates with zeros + df: + dataframe with data to transform. - - If "mean", then replace missing dates using the mean in fit stage. - - - If "running_mean" then replace missing dates using mean of subset of data - - - If "forward_fill" then replace missing dates using last existing value - - - If "seasonal" then replace missing dates using seasonal moving average - - window: - In case of moving average and seasonality. - - * If ``window=-1`` all previous dates are taken in account - - * Otherwise only window previous dates - - seasonality: - the length of the seasonality - default_value: - value which will be used to impute the NaNs left after applying the imputer with the chosen strategy - - Raises - ------ - ValueError: - if incorrect strategy given + Returns + ------- + result: pd.DataFrame + transformed series """ - self.in_column = in_column - self.strategy = strategy - self.window = window - self.seasonality = seasonality - self.default_value = default_value - super().__init__( - transform=_OneSegmentTimeSeriesImputerTransform( - in_column=self.in_column, - strategy=self.strategy, - window=self.window, - seasonality=self.seasonality, - default_value=self.default_value, - ) - ) + segments = sorted(set(df.columns.get_level_values("segment"))) + result_df = df.copy() + + for segment in segments: + index = result_df.index.intersection(self._nan_timestamps[segment]) + result_df.loc[index, pd.IndexSlice[segment, self.in_column]] = np.nan + return result_df __all__ = ["TimeSeriesImputerTransform"] diff --git a/tests/test_transforms/test_missing_values/conftest.py b/tests/test_transforms/test_missing_values/conftest.py index 2251d3508..de75843d1 100644 --- a/tests/test_transforms/test_missing_values/conftest.py +++ b/tests/test_transforms/test_missing_values/conftest.py @@ -25,15 +25,16 @@ def all_date_present_df(date_range: pd.Series) -> pd.DataFrame: """Create pd.DataFrame that contains some target on given range of dates without gaps.""" df = pd.DataFrame({"timestamp": date_range}) df["target"] = list(range(len(df))) - df.set_index("timestamp", inplace=True) + df["segment"] = "segment_1" + df = TSDataset.to_dataset(df) return df @pytest.fixture -def all_date_present_df_two_segments(all_date_present_df: pd.Series) -> pd.DataFrame: +def all_date_present_df_two_segments(all_date_present_df: pd.DataFrame) -> pd.DataFrame: """Create pd.DataFrame that contains two segments with some targets on given range of dates without gaps.""" - df_1 = all_date_present_df.reset_index() - df_2 = all_date_present_df.copy().reset_index() + df_1 = TSDataset.to_flatten(all_date_present_df) + df_2 = df_1.copy() df_1["segment"] = "segment_1" df_2["segment"] = "segment_2" @@ -50,8 +51,8 @@ def df_with_missing_value_x_index(random_seed, all_date_present_df: pd.DataFrame # because Imputer should know starting and ending dates timestamps = sorted(all_date_present_df.index)[1:-1] idx = np.random.choice(timestamps) - df = all_date_present_df - df.loc[idx, "target"] = np.NaN + df = all_date_present_df.loc[:, pd.IndexSlice["segment_1", :]] + df.loc[idx, pd.IndexSlice[:, "target"]] = np.NaN return df, idx @@ -60,8 +61,8 @@ def df_with_missing_range_x_index(all_date_present_df: pd.DataFrame) -> Tuple[pd """Create pd.DataFrame that contains some target on given range of dates with range of gaps.""" timestamps = sorted(all_date_present_df.index) rng = timestamps[2:7] - df = all_date_present_df - df.loc[rng, "target"] = np.NaN + df = all_date_present_df.loc[:, pd.IndexSlice["segment_1", :]] + df.loc[rng, pd.IndexSlice[:, "target"]] = np.NaN return df, rng @@ -71,8 +72,8 @@ def df_with_missing_range_x_index_two_segments( ) -> Tuple[pd.DataFrame, list]: """Create pd.DataFrame that contains some target on given range of dates with range of gaps.""" df_one_segment, rng = df_with_missing_range_x_index - df_1 = df_one_segment.reset_index() - df_2 = df_one_segment.copy().reset_index() + df_1 = TSDataset.to_flatten(df_one_segment) + df_2 = df_1.copy() df_1["segment"] = "segment_1" df_2["segment"] = "segment_2" classic_df = pd.concat([df_1, df_2], ignore_index=True) diff --git a/tests/test_transforms/test_missing_values/test_impute_transform.py b/tests/test_transforms/test_missing_values/test_impute_transform.py index f7903ce6b..dfd94b2e4 100644 --- a/tests/test_transforms/test_missing_values/test_impute_transform.py +++ b/tests/test_transforms/test_missing_values/test_impute_transform.py @@ -7,7 +7,6 @@ from etna.datasets import TSDataset from etna.models import NaiveModel from etna.transforms.missing_values import TimeSeriesImputerTransform -from etna.transforms.missing_values.imputation import _OneSegmentTimeSeriesImputerTransform @pytest.fixture @@ -25,31 +24,12 @@ def ts_nans_beginning(example_reg_tsds): return ts -def test_wrong_init_one_segment(): - """Check that imputer for one segment fails to init with wrong imputing strategy.""" - with pytest.raises(ValueError): - _ = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="wrong_strategy", window=-1, seasonality=1, default_value=None - ) - - -def test_wrong_init_two_segments(all_date_present_df_two_segments): +def test_wrong_init(all_date_present_df_two_segments): """Check that imputer for two segments fails to fit_transform with wrong imputing strategy.""" with pytest.raises(ValueError): _ = TimeSeriesImputerTransform(strategy="wrong_strategy") -@pytest.mark.smoke -@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill", "seasonal"]) -def test_all_dates_present_impute(all_date_present_df: pd.DataFrame, fill_strategy: str): - """Check that imputer does nothing with series without gaps.""" - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy=fill_strategy, window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(all_date_present_df) - np.testing.assert_array_equal(all_date_present_df["target"], result["target"]) - - @pytest.mark.smoke @pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill", "seasonal"]) def test_all_dates_present_impute_two_segments(all_date_present_df_two_segments: pd.DataFrame, fill_strategy: str): @@ -60,16 +40,6 @@ def test_all_dates_present_impute_two_segments(all_date_present_df_two_segments: np.testing.assert_array_equal(all_date_present_df_two_segments[segment]["target"], result[segment]["target"]) -@pytest.mark.parametrize("fill_strategy", ["zero", "mean", "running_mean", "forward_fill", "seasonal"]) -def test_all_missing_impute_fail(df_all_missing: pd.DataFrame, fill_strategy: str): - """Check that imputer can't fill nans if all values are nans.""" - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy=fill_strategy, window=-1, seasonality=1, default_value=None - ) - with pytest.raises(ValueError, match="Series hasn't non NaN values which means it is empty and can't be filled"): - _ = imputer.fit_transform(df_all_missing) - - @pytest.mark.parametrize("fill_strategy", ["mean", "running_mean", "forward_fill", "seasonal"]) def test_all_missing_impute_fail_two_segments(df_all_missing_two_segments: pd.DataFrame, fill_strategy: str): """Check that imputer can't fill nans if all values are nans.""" @@ -81,10 +51,9 @@ def test_all_missing_impute_fail_two_segments(df_all_missing_two_segments: pd.Da def test_one_missing_value_zero(df_with_missing_value_x_index: pd.DataFrame): """Check that imputer with zero-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="zero", window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="zero") + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] assert result.loc[idx] == 0 assert not result.isna().any() @@ -92,10 +61,9 @@ def test_one_missing_value_zero(df_with_missing_value_x_index: pd.DataFrame): def test_range_missing_zero(df_with_missing_range_x_index: pd.DataFrame): """Check that imputer with zero-strategy works correctly in case of range of missing values in data.""" df, rng = df_with_missing_range_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="zero", window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="zero") + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] expected_series = pd.Series(index=rng, data=[0 for _ in rng], name="target") np.testing.assert_array_almost_equal(result.loc[rng].reset_index(drop=True), expected_series) assert not result.isna().any() @@ -104,11 +72,10 @@ def test_range_missing_zero(df_with_missing_range_x_index: pd.DataFrame): def test_one_missing_value_mean(df_with_missing_value_x_index: pd.DataFrame): """Check that imputer with mean-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="mean", window=-1, seasonality=1, default_value=None - ) - expected_value = df["target"].mean() - result = imputer.fit_transform(df)["target"] + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="mean") + expected_value = df.loc[:, pd.IndexSlice[segment, "target"]].mean() + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] assert result.loc[idx] == expected_value assert not result.isna().any() @@ -116,11 +83,10 @@ def test_one_missing_value_mean(df_with_missing_value_x_index: pd.DataFrame): def test_range_missing_mean(df_with_missing_range_x_index): """Check that imputer with mean-strategy works correctly in case of range of missing values in data.""" df, rng = df_with_missing_range_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="mean", window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] - expected_value = df["target"].mean() + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="mean") + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] + expected_value = df.loc[:, pd.IndexSlice[segment, "target"]].mean() expected_series = pd.Series(index=rng, data=[expected_value for _ in rng], name="target") np.testing.assert_array_almost_equal(result.loc[rng].reset_index(drop=True), expected_series) assert not result.isna().any() @@ -129,14 +95,13 @@ def test_range_missing_mean(df_with_missing_range_x_index): def test_one_missing_value_forward_fill(df_with_missing_value_x_index): """Check that imputer with forward-fill-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="forward_fill", window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="forward_fill") + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] timestamps = np.array(sorted(df.index)) timestamp_idx = np.where(timestamps == idx)[0][0] - expected_value = df.loc[timestamps[timestamp_idx - 1], "target"] + expected_value = df.loc[timestamps[timestamp_idx - 1], pd.IndexSlice[segment, "target"]] assert result.loc[idx] == expected_value assert not result.isna().any() @@ -144,15 +109,14 @@ def test_one_missing_value_forward_fill(df_with_missing_value_x_index): def test_range_missing_forward_fill(df_with_missing_range_x_index: pd.DataFrame): """Check that imputer with forward-fill-strategy works correctly in case of range of missing values in data.""" df, rng = df_with_missing_range_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="forward_fill", window=-1, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] + segment = df.columns.get_level_values("segment")[0] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="forward_fill") + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] timestamps = np.array(sorted(df.index)) rng = [pd.Timestamp(x) for x in rng] timestamp_idx = min(np.where([x in rng for x in timestamps])[0]) - expected_value = df.loc[timestamps[timestamp_idx - 1], "target"] + expected_value = df.loc[timestamps[timestamp_idx - 1], pd.IndexSlice[segment, "target"]] expected_series = pd.Series(index=rng, data=[expected_value for _ in rng], name="target") np.testing.assert_array_almost_equal(result.loc[rng], expected_series) assert not result.isna().any() @@ -162,16 +126,17 @@ def test_range_missing_forward_fill(df_with_missing_range_x_index: pd.DataFrame) def test_one_missing_value_running_mean(df_with_missing_value_x_index: pd.DataFrame, window: int): """Check that imputer with running-mean-strategy works correctly in case of one missing value in data.""" df, idx = df_with_missing_value_x_index + segment = df.columns.get_level_values("segment")[0] timestamps = np.array(sorted(df.index)) timestamp_idx = np.where(timestamps == idx)[0][0] - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="running_mean", window=window, seasonality=1, default_value=None - ) + imputer = TimeSeriesImputerTransform(in_column="target", strategy="running_mean", window=window) if window == -1: - expected_value = df.loc[: timestamps[timestamp_idx - 1], "target"].mean() + expected_value = df.loc[: timestamps[timestamp_idx - 1], pd.IndexSlice[segment, "target"]].mean() else: - expected_value = df.loc[timestamps[timestamp_idx - window] : timestamps[timestamp_idx - 1], "target"].mean() - result = imputer.fit_transform(df)["target"] + expected_value = df.loc[ + timestamps[timestamp_idx - window] : timestamps[timestamp_idx - 1], pd.IndexSlice[segment, "target"] + ].mean() + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] assert result.loc[idx] == expected_value assert not result.isna().any() @@ -180,12 +145,11 @@ def test_one_missing_value_running_mean(df_with_missing_value_x_index: pd.DataFr def test_range_missing_running_mean(df_with_missing_range_x_index: pd.DataFrame, window: int): """Check that imputer with running-mean-strategy works correctly in case of range of missing values in data.""" df, rng = df_with_missing_range_x_index + segment = df.columns.get_level_values("segment")[0] timestamps = np.array(sorted(df.index)) timestamp_idxs = np.where([x in rng for x in timestamps])[0] - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy="running_mean", window=window, seasonality=1, default_value=None - ) - result = imputer.fit_transform(df)["target"] + imputer = TimeSeriesImputerTransform(in_column="target", strategy="running_mean", window=window) + result = imputer.fit_transform(df).loc[:, pd.IndexSlice[segment, "target"]] assert not result.isna().any() for idx in timestamp_idxs: @@ -288,18 +252,6 @@ def test_default_value(ts_to_fill, window: int, seasonality: int, default_value: np.testing.assert_array_equal(result, expected) -@pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill", "seasonal"]) -def test_inverse_transform_one_segment(df_with_missing_range_x_index: pd.DataFrame, fill_strategy: str): - """Check that transform + inverse_transform don't change original df for one segment.""" - df, rng = df_with_missing_range_x_index - imputer = _OneSegmentTimeSeriesImputerTransform( - in_column="target", strategy=fill_strategy, window=-1, seasonality=1, default_value=None - ) - transform_result = imputer.fit_transform(df) - inverse_transform_result = imputer.inverse_transform(transform_result) - np.testing.assert_array_equal(df, inverse_transform_result) - - @pytest.mark.parametrize("fill_strategy", ["mean", "zero", "running_mean", "forward_fill", "seasonal"]) def test_inverse_transform_many_segments(df_with_missing_range_x_index_two_segments: pd.DataFrame, fill_strategy: str): """Check that transform + inverse_transform don't change original df for two segments."""