diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index db56b8336..6a5a2cff5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -107,3 +107,42 @@ jobs: - name: Upload coverage uses: codecov/codecov-action@v2 + + test-pandas-versions: + runs-on: ubuntu-latest + strategy: + matrix: + pandas-version: + - ">=1.1,<1.2" + - ">=1.2,<1.3" + - ">=1.3,<1.4" + - ">=1.4" + fail-fast: false + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + + - name: Install dependencies + run: | + poetry install -E "all tests" -vv + pip install "pandas${{ matrix.pandas-version }}" + + - name: PyTest ("tsdataset transforms") + run: | + poetry run pytest tests/test_datasets -v --cov=etna --cov-report=xml + poetry run pytest tests/test_transforms -v --cov=etna --cov-report=xml + + - name: Upload coverage + uses: codecov/codecov-action@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 720be2eda..38f94105b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - - -- +- Make slicing faster in `TSDataset._merge_exog`, `FilterFeaturesTransform`, `AddConstTransform`, `LambdaTransform`, `LagTransform`, `LogTransform`, `SklearnTransform`, `WindowStatisticsTransform`; make CICD test different pandas versions ([#900](https://github.com/tinkoff-ai/etna/pull/900)) - - ### Fixed diff --git a/etna/datasets/__init__.py b/etna/datasets/__init__.py index eead6e2a3..9ef938d77 100644 --- a/etna/datasets/__init__.py +++ b/etna/datasets/__init__.py @@ -4,3 +4,4 @@ from etna.datasets.datasets_generation import generate_periodic_df from etna.datasets.tsdataset import TSDataset from etna.datasets.utils import duplicate_data +from etna.datasets.utils import set_columns_wide diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 621a8d81d..74f15336e 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -406,8 +406,7 @@ def _check_regressors(df: pd.DataFrame, df_regressors: pd.DataFrame): def _merge_exog(self, df: pd.DataFrame) -> pd.DataFrame: if self.df_exog is None: raise ValueError("Something went wrong, Trying to merge df_exog which is None!") - segments = sorted(set(df.columns.get_level_values("segment"))) - df_regressors = self.df_exog.loc[:, pd.IndexSlice[segments, self.known_future]] + df_regressors = self.df_exog.loc[:, pd.IndexSlice[:, self.known_future]] self._check_regressors(df=df, df_regressors=df_regressors) df = pd.concat((df, self.df_exog), axis=1).loc[df.index].sort_index(axis=1, level=(0, 1)) return df diff --git a/etna/datasets/utils.py b/etna/datasets/utils.py index d48e81b32..0beef91ba 100644 --- a/etna/datasets/utils.py +++ b/etna/datasets/utils.py @@ -1,5 +1,6 @@ from enum import Enum from typing import List +from typing import Optional from typing import Sequence import pandas as pd @@ -120,3 +121,57 @@ def __getitem__(self, index): def __len__(self): return len(self.ts_samples) + + +def set_columns_wide( + df_left: pd.DataFrame, + df_right: pd.DataFrame, + timestamps_left: Optional[Sequence[pd.Timestamp]] = None, + timestamps_right: Optional[Sequence[pd.Timestamp]] = None, + segments_left: Optional[Sequence[str]] = None, + features_right: Optional[Sequence[str]] = None, + features_left: Optional[Sequence[str]] = None, + segments_right: Optional[Sequence[str]] = None, +) -> pd.DataFrame: + """Set columns in a left dataframe with values from the right dataframe. + + Parameters + ---------- + df_left: + dataframe to set columns in + df_right: + dataframe to set columns from + timestamps_left: + timestamps to select in ``df_left`` + timestamps_right: + timestamps to select in ``df_right`` + segments_left: + segments to select in ``df_left`` + segments_right: + segments to select in ``df_right`` + features_left: + features to select in ``df_left`` + features_right: + features to select in ``df_right`` + + Returns + ------- + : + a new dataframe with changed columns + """ + # sort columns + df_left = df_left.sort_index(axis=1) + df_right = df_right.sort_index(axis=1) + + # prepare indexing + timestamps_left_index = slice(None) if timestamps_left is None else timestamps_left + timestamps_right_index = slice(None) if timestamps_right is None else timestamps_right + segments_left_index = slice(None) if segments_left is None else segments_left + segments_right_index = slice(None) if segments_right is None else segments_right + features_left_index = slice(None) if features_left is None else features_left + features_right_index = slice(None) if features_right is None else features_right + + right_value = df_right.loc[timestamps_right_index, (segments_right_index, features_right_index)] + df_left.loc[timestamps_left_index, (segments_left_index, features_left_index)] = right_value.values + + return df_left diff --git a/etna/transforms/feature_selection/filter.py b/etna/transforms/feature_selection/filter.py index 3c5614b53..904ccc8a8 100644 --- a/etna/transforms/feature_selection/filter.py +++ b/etna/transforms/feature_selection/filter.py @@ -73,14 +73,15 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: if self.include is not None: if not set(self.include).issubset(features): raise ValueError(f"Features {set(self.include) - set(features)} are not present in the dataset.") - segments = sorted(set(df.columns.get_level_values("segment"))) - result = result.loc[:, pd.IndexSlice[segments, self.include]] + result = result.loc[:, pd.IndexSlice[:, self.include]] if self.exclude is not None and self.exclude: if not set(self.exclude).issubset(features): raise ValueError(f"Features {set(self.exclude) - set(features)} are not present in the dataset.") result = result.drop(columns=self.exclude, level="feature") if self.return_features: self._df_removed = df.drop(result.columns, axis=1) + + result = result.sort_index(axis=1) return result def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: diff --git a/etna/transforms/math/add_constant.py b/etna/transforms/math/add_constant.py index d7060fea9..d373fba26 100644 --- a/etna/transforms/math/add_constant.py +++ b/etna/transforms/math/add_constant.py @@ -3,6 +3,7 @@ import pandas as pd +from etna.datasets import set_columns_wide from etna.transforms.base import Transform from etna.transforms.utils import match_target_quantiles @@ -75,10 +76,12 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: segments = sorted(set(df.columns.get_level_values("segment"))) result = df.copy() - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]] transformed_features = features + self.value if self.inplace: - result.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result = set_columns_wide( + result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) else: column_name = self._get_column_name() transformed_features.columns = pd.MultiIndex.from_product([segments, [column_name]]) @@ -101,17 +104,23 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: """ result = df.copy() if self.inplace: - segments = sorted(set(df.columns.get_level_values("segment"))) - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]] transformed_features = features - self.value - result.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result = set_columns_wide( + result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) if self.in_column == "target": segment_columns = result.columns.get_level_values("feature").tolist() quantiles = match_target_quantiles(set(segment_columns)) for quantile_column_nm in quantiles: - features = df.loc[:, pd.IndexSlice[segments, quantile_column_nm]] + features = df.loc[:, pd.IndexSlice[:, quantile_column_nm]] transformed_features = features - self.value - result.loc[:, pd.IndexSlice[segments, quantile_column_nm]] = transformed_features + result = set_columns_wide( + result, + transformed_features, + features_left=[quantile_column_nm], + features_right=[quantile_column_nm], + ) return result diff --git a/etna/transforms/math/apply_lambda.py b/etna/transforms/math/apply_lambda.py index 67e116fd9..c93fc7082 100644 --- a/etna/transforms/math/apply_lambda.py +++ b/etna/transforms/math/apply_lambda.py @@ -4,6 +4,7 @@ import pandas as pd +from etna.datasets import set_columns_wide from etna.transforms.base import Transform from etna.transforms.utils import match_target_quantiles @@ -94,10 +95,12 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: """ result = df.copy() segments = sorted(set(df.columns.get_level_values("segment"))) - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]].sort_index(axis=1) transformed_features = self.transform_func(features) if self.inplace: - result.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result = set_columns_wide( + result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) else: transformed_features.columns = pd.MultiIndex.from_product([segments, [self.change_column]]) result = pd.concat([result] + [transformed_features], axis=1) @@ -119,15 +122,21 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: """ result_df = df.copy() if self.inverse_transform_func: - segments = sorted(set(df.columns.get_level_values("segment"))) - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]].sort_index(axis=1) transformed_features = self.inverse_transform_func(features) - result_df.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result_df = set_columns_wide( + result_df, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) if self.in_column == "target": segment_columns = result_df.columns.get_level_values("feature").tolist() quantiles = match_target_quantiles(set(segment_columns)) for quantile_column_nm in quantiles: - features = df.loc[:, pd.IndexSlice[segments, quantile_column_nm]] + features = df.loc[:, pd.IndexSlice[:, quantile_column_nm]].sort_index(axis=1) transformed_features = self.inverse_transform_func(features) - result_df.loc[:, pd.IndexSlice[segments, quantile_column_nm]] = transformed_features + result_df = set_columns_wide( + result_df, + transformed_features, + features_left=[quantile_column_nm], + features_right=[quantile_column_nm], + ) return result_df diff --git a/etna/transforms/math/lags.py b/etna/transforms/math/lags.py index 4d8e5b35b..c8b4101a0 100644 --- a/etna/transforms/math/lags.py +++ b/etna/transforms/math/lags.py @@ -82,7 +82,7 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: result = df.copy() segments = sorted(set(df.columns.get_level_values("segment"))) all_transformed_features = [] - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]] for lag in self.lags: column_name = self._get_column_name(lag) transformed_features = features.shift(lag) diff --git a/etna/transforms/math/log.py b/etna/transforms/math/log.py index d495cfef0..9c53da260 100644 --- a/etna/transforms/math/log.py +++ b/etna/transforms/math/log.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd +from etna.datasets import set_columns_wide from etna.transforms.base import Transform from etna.transforms.utils import match_target_quantiles @@ -72,14 +73,16 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: transformed dataframe """ segments = sorted(set(df.columns.get_level_values("segment"))) - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]] if (features < 0).any().any(): raise ValueError("LogPreprocess can be applied only to non-negative series") result = df.copy() transformed_features = np.log1p(features) / np.log(self.base) if self.inplace: - result.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result = set_columns_wide( + result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) else: column_name = self._get_column_name() transformed_features.columns = pd.MultiIndex.from_product([segments, [column_name]]) @@ -102,17 +105,23 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: """ result = df.copy() if self.inplace: - segments = sorted(set(df.columns.get_level_values("segment"))) - features = df.loc[:, pd.IndexSlice[segments, self.in_column]] + features = df.loc[:, pd.IndexSlice[:, self.in_column]] transformed_features = np.expm1(features * np.log(self.base)) - result.loc[:, pd.IndexSlice[segments, self.in_column]] = transformed_features + result = set_columns_wide( + result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] + ) if self.in_column == "target": segment_columns = result.columns.get_level_values("feature").tolist() quantiles = match_target_quantiles(set(segment_columns)) for quantile_column_nm in quantiles: - features = df.loc[:, pd.IndexSlice[segments, quantile_column_nm]] + features = df.loc[:, pd.IndexSlice[:, quantile_column_nm]] transformed_features = np.expm1(features * np.log(self.base)) - result.loc[:, pd.IndexSlice[segments, quantile_column_nm]] = transformed_features + result = set_columns_wide( + result, + transformed_features, + features_left=[quantile_column_nm], + features_right=[quantile_column_nm], + ) return result diff --git a/etna/transforms/math/sklearn.py b/etna/transforms/math/sklearn.py index d0dc9c256..bd32db9d6 100644 --- a/etna/transforms/math/sklearn.py +++ b/etna/transforms/math/sklearn.py @@ -10,6 +10,7 @@ from sklearn.base import TransformerMixin from etna.core import StringEnumWithRepr +from etna.datasets import set_columns_wide from etna.transforms.base import Transform from etna.transforms.utils import match_target_quantiles @@ -93,7 +94,7 @@ def fit(self, df: pd.DataFrame) -> "SklearnTransform": ------- : """ - segments = sorted(set(df.columns.get_level_values("segment"))) + df = df.sort_index(axis=1) if self.in_column is None: self.in_column = sorted(set(df.columns.get_level_values("feature"))) @@ -104,7 +105,7 @@ def fit(self, df: pd.DataFrame) -> "SklearnTransform": self.out_columns = [self._get_column_name(column) for column in self.in_column] if self.mode == TransformMode.per_segment: - x = df.loc[:, (segments, self.in_column)].values + x = df.loc[:, pd.IndexSlice[:, self.in_column]].values elif self.mode == TransformMode.macro: x = self._reshape(df) else: @@ -127,9 +128,11 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: : transformed DataFrame. """ + df = df.sort_index(axis=1) segments = sorted(set(df.columns.get_level_values("segment"))) + if self.mode == TransformMode.per_segment: - x = df.loc[:, (segments, self.in_column)].values + x = df.loc[:, pd.IndexSlice[:, self.in_column]].values transformed = self.transformer.transform(X=x) elif self.mode == TransformMode.macro: @@ -139,11 +142,11 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: else: raise ValueError(f"'{self.mode}' is not a valid TransformMode.") if self.inplace: - df.loc[:, (segments, self.in_column)] = transformed + df.loc[:, pd.IndexSlice[:, self.in_column]] = transformed else: transformed_features = pd.DataFrame( - transformed, columns=df.loc[:, (segments, self.in_column)].columns, index=df.index - ) + transformed, columns=df.loc[:, pd.IndexSlice[:, self.in_column]].columns, index=df.index + ).sort_index(axis=1) transformed_features.columns = pd.MultiIndex.from_product([segments, self.out_columns]) df = pd.concat((df, transformed_features), axis=1) df = df.sort_index(axis=1) @@ -164,7 +167,7 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: : transformed DataFrame. """ - segments = sorted(set(df.columns.get_level_values("segment"))) + df = df.sort_index(axis=1) if self.in_column is None: raise ValueError("Transform is not fitted yet.") @@ -177,17 +180,18 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: quantiles_arrays: Dict[str, pd.DataFrame] = dict() if self.mode == TransformMode.per_segment: - x = df.loc[:, (segments, self.in_column)].values + x = df.loc[:, pd.IndexSlice[:, self.in_column]].values transformed = self.transformer.inverse_transform(X=x) # quantiles inverse transformation for quantile_column_nm in quantiles: - df_slice_copy = df.loc[:, (segments, self.in_column)].copy() - df_slice_copy.loc[:, (segments, "target")] = df.loc[:, (segments, quantile_column_nm)].values - df_slice_copy.loc[:, (segments, self.in_column)] = self.transformer.inverse_transform( - X=df_slice_copy + df_slice_copy = df.loc[:, pd.IndexSlice[:, self.in_column]].copy() + df_slice_copy = set_columns_wide( + df_slice_copy, df, features_left=["target"], features_right=[quantile_column_nm] ) - quantiles_arrays[quantile_column_nm] = df_slice_copy.loc[:, (segments, "target")].rename( + transformed_quantile = self.transformer.inverse_transform(X=df_slice_copy) + df_slice_copy.loc[:, pd.IndexSlice[:, self.in_column]] = transformed_quantile + quantiles_arrays[quantile_column_nm] = df_slice_copy.loc[:, pd.IndexSlice[:, "target"]].rename( columns={"target": quantile_column_nm} ) @@ -198,28 +202,29 @@ def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: # quantiles inverse transformation for quantile_column_nm in quantiles: - df_slice_copy = df.loc[:, (segments, self.in_column)].copy() - df_slice_copy.loc[:, (segments, "target")] = df.loc[:, (segments, quantile_column_nm)].values - df_slice_copy_reshaped_array = self._reshape(df_slice_copy) - transformed_ = self.transformer.inverse_transform(X=df_slice_copy_reshaped_array) - df_slice_copy.loc[:, (segments, self.in_column)] = self._inverse_reshape( - df_slice_copy, transformed_ + df_slice_copy = df.loc[:, pd.IndexSlice[:, self.in_column]].copy() + df_slice_copy = set_columns_wide( + df_slice_copy, df, features_left=["target"], features_right=[quantile_column_nm] ) - quantiles_arrays[quantile_column_nm] = df_slice_copy.loc[:, (segments, "target")].rename( + df_slice_copy_reshaped_array = self._reshape(df_slice_copy) + transformed_quantile = self.transformer.inverse_transform(X=df_slice_copy_reshaped_array) + inverse_reshaped_quantile = self._inverse_reshape(df_slice_copy, transformed_quantile) + df_slice_copy.loc[:, pd.IndexSlice[:, self.in_column]] = inverse_reshaped_quantile + quantiles_arrays[quantile_column_nm] = df_slice_copy.loc[:, pd.IndexSlice[:, "target"]].rename( columns={"target": quantile_column_nm} ) else: raise ValueError(f"'{self.mode}' is not a valid TransformMode.") - df.loc[:, (segments, self.in_column)] = transformed + df.loc[:, pd.IndexSlice[:, self.in_column]] = transformed for quantile_column_nm in quantiles: - df.loc[:, (segments, quantile_column_nm)] = quantiles_arrays[quantile_column_nm].values + df.loc[:, pd.IndexSlice[:, quantile_column_nm]] = quantiles_arrays[quantile_column_nm].values return df def _reshape(self, df: pd.DataFrame) -> np.ndarray: segments = sorted(set(df.columns.get_level_values("segment"))) - x = df.loc[:, (segments, self.in_column)] + x = df.loc[:, pd.IndexSlice[:, self.in_column]] x = pd.concat([x[segment] for segment in segments]).values return x diff --git a/etna/transforms/math/statistics.py b/etna/transforms/math/statistics.py index 116c2b443..267bebbf0 100644 --- a/etna/transforms/math/statistics.py +++ b/etna/transforms/math/statistics.py @@ -73,7 +73,8 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame: history = self.seasonality * self.window if self.window != -1 else len(df) segments = sorted(df.columns.get_level_values("segment").unique()) - x = df.loc[pd.IndexSlice[:], pd.IndexSlice[segments, self.in_column]].values[::-1] + df_slice = df.loc[:, pd.IndexSlice[:, self.in_column]].sort_index(axis=1) + x = df_slice.values[::-1] # Addend NaNs to obtain a window of length "history" for each point x = np.append(x, np.empty((history - 1, x.shape[1])) * np.nan, axis=0) diff --git a/tests/test_datasets/test_utils.py b/tests/test_datasets/test_utils.py index a409da7f3..85fb5df10 100644 --- a/tests/test_datasets/test_utils.py +++ b/tests/test_datasets/test_utils.py @@ -2,8 +2,11 @@ import pandas as pd import pytest +from etna.datasets import TSDataset from etna.datasets import duplicate_data +from etna.datasets import generate_ar_df from etna.datasets.utils import _TorchDataset +from etna.datasets.utils import set_columns_wide @pytest.fixture @@ -67,3 +70,107 @@ def test_torch_dataset(): assert torch_dataset[0] == ts_samples[0] assert len(torch_dataset) == 1 + + +def _get_df_wide(random_seed: int) -> pd.DataFrame: + df = generate_ar_df(periods=5, start_time="2020-01-01", n_segments=3, random_seed=random_seed) + df_wide = TSDataset.to_dataset(df) + + df_exog = df.copy() + df_exog = df_exog.rename(columns={"target": "exog_0"}) + df_exog["exog_0"] = df_exog["exog_0"] + 1 + df_exog["exog_1"] = df_exog["exog_0"] + 1 + df_exog["exog_2"] = df_exog["exog_1"] + 1 + df_exog_wide = TSDataset.to_dataset(df_exog) + + ts = TSDataset(df=df_wide, df_exog=df_exog_wide, freq="D") + df = ts.df + + # make some reorderings for checking corner cases + df = df.loc[:, pd.IndexSlice[["segment_2", "segment_0", "segment_1"], ["target", "exog_2", "exog_1", "exog_0"]]] + + return df + + +@pytest.fixture +def df_left() -> pd.DataFrame: + return _get_df_wide(0) + + +@pytest.fixture +def df_right() -> pd.DataFrame: + return _get_df_wide(1) + + +@pytest.mark.parametrize( + "features_left, features_right", + [ + (None, None), + (["exog_0"], ["exog_0"]), + (["exog_0", "exog_1"], ["exog_0", "exog_1"]), + (["exog_0", "exog_1"], ["exog_1", "exog_2"]), + ], +) +@pytest.mark.parametrize( + "segments_left, segment_right", + [ + (None, None), + (["segment_0"], ["segment_0"]), + (["segment_0", "segment_1"], ["segment_0", "segment_1"]), + (["segment_0", "segment_1"], ["segment_1", "segment_2"]), + ], +) +@pytest.mark.parametrize( + "timestamps_idx_left, timestamps_idx_right", [(None, None), ([0], [0]), ([1, 2], [1, 2]), ([1, 2], [3, 4])] +) +def test_set_columns_wide( + timestamps_idx_left, + timestamps_idx_right, + segments_left, + segment_right, + features_left, + features_right, + df_left, + df_right, +): + timestamps_left = None if timestamps_idx_left is None else df_left.index[timestamps_idx_left] + timestamps_right = None if timestamps_idx_right is None else df_right.index[timestamps_idx_right] + + df_obtained = set_columns_wide( + df_left, + df_right, + timestamps_left=timestamps_left, + timestamps_right=timestamps_right, + segments_left=segments_left, + segments_right=segment_right, + features_left=features_left, + features_right=features_right, + ) + + # get expected result + df_expected = df_left.copy() + + timestamps_left_full = df_left.index.tolist() if timestamps_left is None else timestamps_left + timestamps_right_full = df_right.index.tolist() if timestamps_left is None else timestamps_right + + segments_left_full = ( + df_left.columns.get_level_values("segment").unique().tolist() if segments_left is None else segments_left + ) + segments_right_full = ( + df_left.columns.get_level_values("segment").unique().tolist() if segment_right is None else segment_right + ) + + features_left_full = ( + df_left.columns.get_level_values("feature").unique().tolist() if features_left is None else features_left + ) + features_right_full = ( + df_left.columns.get_level_values("feature").unique().tolist() if features_right is None else features_right + ) + + right_value = df_right.loc[timestamps_right_full, pd.IndexSlice[segments_right_full, features_right_full]] + df_expected.loc[timestamps_left_full, pd.IndexSlice[segments_left_full, features_left_full]] = right_value.values + + df_expected = df_expected.sort_index(axis=1) + + # compare values + pd.testing.assert_frame_equal(df_obtained, df_expected)