From 1993d6a5a6402231d8e7492b01a92353c40cbc54 Mon Sep 17 00:00:00 2001 From: Yakov Malyshev Date: Mon, 5 Jun 2023 17:27:45 +0300 Subject: [PATCH 1/4] Change `make_samples` of `MLPNet` and `RNNNet` --- etna/models/nn/mlp.py | 40 ++++++++++++++++++++++++++------------ etna/models/nn/rnn.py | 45 +++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/etna/models/nn/mlp.py b/etna/models/nn/mlp.py index 034b1b252..5c1ff81cc 100644 --- a/etna/models/nn/mlp.py +++ b/etna/models/nn/mlp.py @@ -109,8 +109,22 @@ def step(self, batch: MLPBatch, *args, **kwargs): # type: ignore def make_samples(self, df: pd.DataFrame, encoder_length: int, decoder_length: int) -> Iterable[dict]: """Make samples from segment DataFrame.""" + values_real = ( + df.select_dtypes(include=[np.number]) + .pipe(lambda x: x[[i for i in x.columns if i != "target"]]) + .values + ) + values_target = df["target"].values + segment = df["segment"].values[0] + + def _make( + values_target: np.ndarray, + values_real: np.ndarray, + segment: str, + start_idx: int, + decoder_length: int + ) -> Optional[dict]: - def _make(df: pd.DataFrame, start_idx: int, decoder_length: int) -> Optional[dict]: sample: Dict[str, Any] = {"decoder_real": list(), "decoder_target": list(), "segment": None} total_length = len(df["target"]) total_sample_length = decoder_length @@ -118,21 +132,17 @@ def _make(df: pd.DataFrame, start_idx: int, decoder_length: int) -> Optional[dic if total_sample_length + start_idx > total_length: return None - sample["decoder_real"] = ( - df.select_dtypes(include=[np.number]) - .pipe(lambda x: x[[i for i in x.columns if i != "target"]]) - .values[start_idx : start_idx + decoder_length] - ) - - target = df["target"].values[start_idx : start_idx + decoder_length].reshape(-1, 1) - sample["decoder_target"] = target - sample["segment"] = df["segment"].values[0] + sample["decoder_real"] = values_real[start_idx : start_idx + decoder_length] + sample["decoder_target"] = values_target[start_idx : start_idx + decoder_length].reshape(-1, 1) + sample["segment"] = segment return sample start_idx = 0 while True: batch = _make( - df=df, + values_target=values_target, + values_real=values_real, + segment=segment, start_idx=start_idx, decoder_length=decoder_length, ) @@ -142,7 +152,13 @@ def _make(df: pd.DataFrame, start_idx: int, decoder_length: int) -> Optional[dic start_idx += decoder_length if start_idx < len(df): resid_length = len(df) - decoder_length - batch = _make(df=df, start_idx=resid_length, decoder_length=decoder_length) + batch = _make( + values_target=values_target, + values_real=values_real, + segment=segment, + start_idx=resid_length, + decoder_length=decoder_length + ) if batch is not None: yield batch diff --git a/etna/models/nn/rnn.py b/etna/models/nn/rnn.py index 7dd410812..b16b75ffe 100644 --- a/etna/models/nn/rnn.py +++ b/etna/models/nn/rnn.py @@ -131,8 +131,25 @@ def step(self, batch: RNNBatch, *args, **kwargs): # type: ignore def make_samples(self, df: pd.DataFrame, encoder_length: int, decoder_length: int) -> Iterator[dict]: """Make samples from segment DataFrame.""" + values_real = ( + df.select_dtypes(include=[np.number]) + .assign(target_shifted=df["target"].shift(1)) + .drop(["target"], axis=1) + .pipe(lambda x: x[["target_shifted"] + [i for i in x.columns if i != "target_shifted"]]) + .values + ) + values_target = df["target"].values + segment = df["segment"].values[0] + + def _make( + values_real: np.ndarray, + values_target: np.ndarray, + segment: str, + start_idx: int, + encoder_length: int, + decoder_length: int, + ) -> Optional[dict]: - def _make(df: pd.DataFrame, start_idx: int, encoder_length: int, decoder_length: int) -> Optional[dict]: sample: Dict[str, Any] = { "encoder_real": list(), "decoder_real": list(), @@ -140,43 +157,33 @@ def _make(df: pd.DataFrame, start_idx: int, encoder_length: int, decoder_length: "decoder_target": list(), "segment": None, } - total_length = len(df["target"]) + total_length = len(values_target) total_sample_length = encoder_length + decoder_length if total_sample_length + start_idx > total_length: return None # Get shifted target and concatenate it with real values features - sample["decoder_real"] = ( - df.select_dtypes(include=[np.number]) - .pipe(lambda x: x[["target"] + [i for i in x.columns if i != "target"]]) - .values[start_idx + encoder_length : start_idx + encoder_length + decoder_length] - ) - sample["decoder_real"][:, 0] = ( - df["target"].shift(1).values[start_idx + encoder_length : start_idx + encoder_length + decoder_length] - ) + sample["decoder_real"] = values_real[start_idx + encoder_length : start_idx + total_sample_length] # Get shifted target and concatenate it with real values features - sample["encoder_real"] = ( - df.select_dtypes(include=[np.number]) - .pipe(lambda x: x[["target"] + [i for i in x.columns if i != "target"]]) - .values[start_idx : start_idx + encoder_length] - ) - sample["encoder_real"][:, 0] = df["target"].shift(1).values[start_idx : start_idx + encoder_length] + sample["encoder_real"] = values_real[start_idx : start_idx + encoder_length] sample["encoder_real"] = sample["encoder_real"][1:] - target = df["target"].values[start_idx : start_idx + encoder_length + decoder_length].reshape(-1, 1) + target = values_target[start_idx : start_idx + encoder_length + decoder_length].reshape(-1, 1) sample["encoder_target"] = target[1:encoder_length] sample["decoder_target"] = target[encoder_length:] - sample["segment"] = df["segment"].values[0] + sample["segment"] = segment return sample start_idx = 0 while True: batch = _make( - df=df, + values_target=values_target, + values_real=values_real, + segment=segment, start_idx=start_idx, encoder_length=encoder_length, decoder_length=decoder_length, From e2db1e78df1f36146e3a72f5251c180e75c4f213 Mon Sep 17 00:00:00 2001 From: Yakov Malyshev Date: Mon, 5 Jun 2023 17:32:20 +0300 Subject: [PATCH 2/4] Reformat code --- etna/models/nn/mlp.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/etna/models/nn/mlp.py b/etna/models/nn/mlp.py index 5c1ff81cc..f345c1ec4 100644 --- a/etna/models/nn/mlp.py +++ b/etna/models/nn/mlp.py @@ -110,19 +110,13 @@ def step(self, batch: MLPBatch, *args, **kwargs): # type: ignore def make_samples(self, df: pd.DataFrame, encoder_length: int, decoder_length: int) -> Iterable[dict]: """Make samples from segment DataFrame.""" values_real = ( - df.select_dtypes(include=[np.number]) - .pipe(lambda x: x[[i for i in x.columns if i != "target"]]) - .values + df.select_dtypes(include=[np.number]).pipe(lambda x: x[[i for i in x.columns if i != "target"]]).values ) values_target = df["target"].values segment = df["segment"].values[0] def _make( - values_target: np.ndarray, - values_real: np.ndarray, - segment: str, - start_idx: int, - decoder_length: int + values_target: np.ndarray, values_real: np.ndarray, segment: str, start_idx: int, decoder_length: int ) -> Optional[dict]: sample: Dict[str, Any] = {"decoder_real": list(), "decoder_target": list(), "segment": None} @@ -157,7 +151,7 @@ def _make( values_real=values_real, segment=segment, start_idx=resid_length, - decoder_length=decoder_length + decoder_length=decoder_length, ) if batch is not None: yield batch From e215a5c0581c102111adc937bb0d9d01b23934e3 Mon Sep 17 00:00:00 2001 From: Yakov Malyshev Date: Mon, 5 Jun 2023 22:22:57 +0300 Subject: [PATCH 3/4] Change CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38f877bf8..6e9ab30d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Inherit `SaveMixin` from `AbstractSaveable` for mypy checker ([#1261](https://github.com/tinkoff-ai/etna/pull/1261)) - Update requirements for `holidays` and `scipy`, change saving library from `pickle` to `dill` in `SaveMixin` ([#1268](https://github.com/tinkoff-ai/etna/pull/1268)) - Update requirement for `ruptures`, add requirement for `sqlalchemy` ([#1276](https://github.com/tinkoff-ai/etna/pull/1276)) -- +- `make_samples` of `RNNNet` and `MLPNet` ([#1281](https://github.com/tinkoff-ai/etna/pull/1281)) +- ### Fixed - Fix `plot_backtest` and `plot_backtest_interactive` on one-step forecast ([1260](https://github.com/tinkoff-ai/etna/pull/1260)) - Fix `BaseReconciliator` to work on `pandas==1.1.5` ([#1229](https://github.com/tinkoff-ai/etna/pull/1229)) From dbbc7a7eb84b0b7b73097025449f56cdfe721286 Mon Sep 17 00:00:00 2001 From: Yakov Malyshev Date: Tue, 6 Jun 2023 13:55:27 +0300 Subject: [PATCH 4/4] Change CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e9ab30d7..65cc85594 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Inherit `SaveMixin` from `AbstractSaveable` for mypy checker ([#1261](https://github.com/tinkoff-ai/etna/pull/1261)) - Update requirements for `holidays` and `scipy`, change saving library from `pickle` to `dill` in `SaveMixin` ([#1268](https://github.com/tinkoff-ai/etna/pull/1268)) - Update requirement for `ruptures`, add requirement for `sqlalchemy` ([#1276](https://github.com/tinkoff-ai/etna/pull/1276)) -- `make_samples` of `RNNNet` and `MLPNet` ([#1281](https://github.com/tinkoff-ai/etna/pull/1281)) +- Optimize `make_samples` of `RNNNet` and `MLPNet` ([#1281](https://github.com/tinkoff-ai/etna/pull/1281)) - ### Fixed - Fix `plot_backtest` and `plot_backtest_interactive` on one-step forecast ([1260](https://github.com/tinkoff-ai/etna/pull/1260))