Fix performance of DeepARModel and TFTModel (#1322)

tinkoff-ai · Jul 24, 2023 · cd93027 · cd93027 · github-actions · Jul 24, 2023
1 parent 75e8fc1
commit cd93027
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - 
 - 
-- 
+- Fix performance of `DeepARModel` and `TFTModel` ([#1322](https://github.com/tinkoff-ai/etna/pull/1322))
 - `mrmr` feature selection working with categoricals ([#1311](https://github.com/tinkoff-ai/etna/pull/1311))
 - Fix version of `statsforecast` to 1.4 to avoid dependency conflicts during installation ([#1313](https://github.com/tinkoff-ai/etna/pull/1313))
 - Add inverse transformation into `predict` method of pipelines ([#1314](https://github.com/tinkoff-ai/etna/pull/1314))

diff --git a/etna/models/nn/utils.py b/etna/models/nn/utils.py
@@ -125,17 +125,16 @@ def create_train_dataset(self, ts: TSDataset) -> TimeSeriesDataSet:
         """
         df_flat = ts.to_pandas(flatten=True)
         df_flat = df_flat.dropna()
-        self.min_timestamp = df_flat.timestamp.min()
+
+        mapping_time_idx = {x: i for i, x in enumerate(ts.index)}
+        df_flat["time_idx"] = df_flat["timestamp"].map(mapping_time_idx)
+
+        self.min_timestamp = df_flat["timestamp"].min()
 
         if self.time_varying_known_categoricals:
             for feature_name in self.time_varying_known_categoricals:
                 df_flat[feature_name] = df_flat[feature_name].astype(str)
 
-        # making time_idx feature.
-        # it's needed for pytorch-forecasting for proper train-test split.
-        # it should be incremented by 1 for every new timestamp.
-        df_flat["time_idx"] = df_flat["timestamp"].apply(lambda x: determine_num_steps(self.min_timestamp, x, ts.freq))
-
         pf_dataset = TimeSeriesDataSet(
             df_flat,
             time_idx="time_idx",
@@ -192,7 +191,12 @@ def create_inference_dataset(self, ts: TSDataset, horizon: int) -> TimeSeriesDat
         df_flat = df_flat[df_flat.timestamp >= self.min_timestamp]
         df_flat["target"] = df_flat["target"].fillna(0)
 
-        df_flat["time_idx"] = df_flat["timestamp"].apply(lambda x: determine_num_steps(self.min_timestamp, x, ts.freq))
+        inference_min_timestamp = df_flat["timestamp"].min()
+        time_idx_shift = determine_num_steps(
+            start_timestamp=self.min_timestamp, end_timestamp=inference_min_timestamp, freq=ts.freq
+        )
+        mapping_time_idx = {x: i + time_idx_shift for i, x in enumerate(ts.index)}
+        df_flat["time_idx"] = df_flat["timestamp"].map(mapping_time_idx)
 
         if self.time_varying_known_categoricals:
             for feature_name in self.time_varying_known_categoricals: