Skip to content

Commit

Permalink
Added NBEATS Module
Browse files Browse the repository at this point in the history
  • Loading branch information
fornasari12 committed Nov 23, 2021
1 parent 2af8743 commit 85f6d88
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 170 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,5 @@ dmypy.json
lightning_logs/
optuna_test/
test_study.pkl
model/n_beats/
model/n_beats/
model/temporal_fusion_transformer/
16 changes: 9 additions & 7 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ general:
scaler_path_nbeats: "/Volumes/GoogleDrive/My Drive/Colab_Notebooks/model/n_beats/scaler.pickle"
model_path_nbeats: "/Volumes/GoogleDrive/My Drive/Colab_Notebooks/model/n_beats/n_beats.pickle"
model:
lags: None
sma: [ 12 ]
lags: null
sma: null
time_varying_known_categoricals: [
"hour",
"month",
Expand All @@ -29,10 +29,11 @@ model:
sample: "60min"
cutoff: 0.70
model_path: "/content/drive/MyDrive/Colab_Notebooks/model/tft.pt"
hyperparameters_path: "/content/drive/MyDrive/Colab_Notebooks/model/tuning_study.pkl"

model_local:
lags: None
sma: [ 12 ]
lags: null
sma: null
time_varying_known_categoricals: [
"hour",
"month",
Expand All @@ -54,17 +55,18 @@ model_local:
sample: "60min"
cutoff: 0.70
model_path: "model/temporal_fusion_transformer/tft_local.pt"
hyperparameters_path: "model/temporal_fusion_transformer/tuning_study.pkl"

nbeats:
input_chunk_length: 48
output_chunk_length: 24
num_stacks: 10
num_blocks: 1
num_layers: 10
num_layers: 15
layer_widths: 512
n_epochs: 500
n_epochs: 3000
nr_epochs_val_period: 1
batch_size: 800
batch_size: 1024
model_name: 'nbeats_run'
sample: "60min"
cutoff: 0.70
Expand Down
102 changes: 102 additions & 0 deletions errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

errors = pd.read_csv("model/n_beats/errors.csv", index_col=0)

for data_name in [
# "Twitter_volume_AAPL",
"Twitter_volume_UPS",
"Twitter_volume_KO",
"Twitter_volume_GOOG",
"Twitter_volume_CVS",
"Twitter_volume_FB",
"Twitter_volume_IBM",
"Twitter_volume_CRM",
"Twitter_volume_PFE",
"Twitter_volume_AMZN"
]:
errors = pd.read_csv(f"model/n_beats/errors_{data_name}.csv", index_col=0)
error_metrics = pd.DataFrame()
for step, df_step in errors.groupby(by="step"):

print("------------------------------------------\n",
f"Step {step}")

df_step = df_step.dropna()

for model in ["tft", "ets", "nbeats"]:

y_obs = df_step["observed"].values
y_hat = df_step[model].values

mse = mean_squared_error(y_obs, y_hat)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_obs, y_hat)

df = pd.DataFrame(
{
"model": model,
"mse": mse,
"rmse": rmse,
"mae": mae,
},
index=[step]
)

error_metrics = pd.concat([error_metrics, df], axis=0)

print(
f"model: {model}\n",
f"mean_squared_error: {mse}\n",
f"root_mean_squared_error: {rmse}\n",
f"mean_absolute_error: {mae}\n",
)

error_metrics = error_metrics.reset_index().rename(columns={"index": "step"})

ax = pd.Series(
index=range(1, 25, 1),
data=error_metrics[error_metrics["model"] == "tft"]["rmse"].values
).plot(
figsize=(10, 6),
style="--",
marker="o",
color="green",
label="Temporal Fusion Transformer",
legend=True,
)

pd.Series(
index=range(1, 25, 1),
data=error_metrics[error_metrics["model"] == "ets"]["rmse"].values
).plot(
ax=ax,
style="--",
marker="o",
color="red",
legend=True,
label="Triple Exponential Smoothing"
)

pd.Series(
index=range(1, 25, 1),
data=error_metrics[error_metrics["model"] == "nbeats"]["rmse"].values
).plot(
ax=ax,
style="--",
marker="o",
color="blue",
legend=True,
label="N-BEATS"
)

plt.title('RMSE for 1-24 forecasting horizon', fontsize=14)
plt.xlabel('Horizon (steps ahead)', fontsize=14)
plt.ylabel('RMSE', fontsize=14)
plt.grid(True)
plt.show()

print(data_name)

113 changes: 69 additions & 44 deletions forecast_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler

from config import load_config
from load_data import LoadData

Expand Down Expand Up @@ -109,6 +106,8 @@

model.load_state_dict(torch.load("/Volumes/GoogleDrive/My Drive/Colab_Notebooks/model/tft.pt"))

errors = pd.DataFrame()

for data_name in train_data.id.unique().tolist():

# _________________________________________________________________________________________________________________
Expand All @@ -132,44 +131,70 @@

# _________________________________________________________________________________________________________________
# Forecast:
for start in range(0, 80, 1):

# Update ES with new Data.
model_es.update(
test_data_es[start:(start + max_encoder_length)],
update_params=False,
)

# Make forecast for es & tft:
y_hat_es = model_es.predict(list(range(1, max_prediction_length + 1)))
y_hat_tft = pd.Series(
index=y_hat_es.index,
data=model.predict(
test_data_tft[start:(start + max_encoder_length)],
mode="prediction",
return_x=True)[0][0].tolist()
)
y_hat_nbeats = pd.read_csv(
f"/Volumes/GoogleDrive/My Drive/Colab_Notebooks/data_nbeats/{data_name}_{start}.csv"
)
y_hat_nbeats = y_hat_nbeats.set_index(pd.to_datetime(y_hat_nbeats.time))
y_hat_nbeats = y_hat_nbeats.drop(columns="time")

# Plot forecasts and observed values
ax = test_data_es[start: start + max_encoder_length + max_prediction_length].plot(
figsize=(10, 6),
marker="o",
color="black",
label="observed"
)
y_hat_es.plot(ax=ax, style="--", marker="o", color="red",
label="exponential_smoothing")
y_hat_tft.plot(ax=ax, style="--", marker="o", color="blue",
label="temporal_fusion_transformer")
y_hat_nbeats.plot(ax=ax, style="--", marker="o", color="green",
label="N=BEATS")

plt.title(f"Forecasts for {data_name}")
plt.pause(0.05)

plt.show()
errors_data_name = pd.DataFrame()
for start in range(0, 120, 1):

try:
# Update ES with new Data.
model_es.update(
test_data_es[start:(start + max_encoder_length)],
update_params=False,
)

# Make forecast for es & tft:
y_hat_es = model_es.predict(list(range(1, max_prediction_length + 1)))
y_hat_tft = pd.Series(
index=y_hat_es.index,
data=model.predict(
test_data_tft[start:(start + max_encoder_length)],
mode="prediction",
return_x=True)[0][0].tolist()
)
y_hat_nbeats = pd.read_csv(
f"/Volumes/GoogleDrive/My Drive/Colab_Notebooks/data_nbeats/{data_name}_{start}.csv"
)
y_hat_nbeats = y_hat_nbeats.set_index(pd.to_datetime(y_hat_nbeats.time))
y_hat_nbeats = y_hat_nbeats.drop(columns="time")

y_obs = test_data_es[start + max_encoder_length: start + max_encoder_length + max_prediction_length]

# Plot forecasts and observed values
# ax = test_data_es[start: start + max_encoder_length + max_prediction_length].plot(
# figsize=(10, 6),
# marker="o",
# color="black",
# label="observed"
# )
# y_hat_es.plot(ax=ax, style="--", marker="o", color="red",
# label="exponential_smoothing")
# y_hat_tft.plot(ax=ax, style="--", marker="o", color="blue",
# label="temporal_fusion_transformer")
# y_hat_nbeats.plot(ax=ax, style="--", marker="o", color="green",
# label="N=BEATS")
#
df_errors = pd.concat([y_obs, y_hat_tft, y_hat_es, y_hat_nbeats], axis=1).reset_index(drop=True)
df_errors.columns = ["observed", "tft", "ets", "nbeats"]
df_errors["step"] = [step for step in range(1, max_prediction_length + 1, 1)]

errors = pd.concat([errors, df_errors], axis=0)
errors_data_name = pd.concat([errors_data_name, df_errors], axis=0)

except Exception as e:
print(f"problem at data_name:{data_name} & step: {start}")
continue

#
# plt.title(f"Forecasts for {data_name}")
# # plt.pause(0.05)
# #
# plt.show(block=False)
# plt.pause(0.0005)
# plt.close()

errors_data_name.to_csv(f"model/n_beats/errors_{data_name}.csv")
print(data_name)

errors.to_csv("model/n_beats/errors.csv")



2 changes: 1 addition & 1 deletion forecast_nbeats.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
)
# _________________________________________________________________________________________________________________
# Forecast:
for start in range(0, 80, 1):
for start in range(0, 300, 1):

y_hat_nbeats = model_n_beats.predict(
n=max_prediction_length,
Expand Down
2 changes: 1 addition & 1 deletion load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def _create_lagged_variables(

df_lags = pd.DataFrame()
for lag in range(lags, 0, -1):
df_lags[f'(t-{lag})'] = \
df_lags[f'lag_{lag}'] = \
df[column_name].shift(lag).fillna(method="bfill")

return pd.concat([df_lags, df], axis=1)
Expand Down
Binary file removed model/temporal_fusion_transformer/tft_local.pt
Binary file not shown.
24 changes: 8 additions & 16 deletions train_tft.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,17 @@

lags = spec[model_key]["lags"]
sma = spec[model_key]["sma"]
sma_columns = [f"sma_{sma}" for sma in sma]

if lags != "None":
lags_columns = [f"(t-{lag})" for lag in range(lags, 0, -1)]
time_varying_known_reals = spec[model_key]["time_varying_known_reals"]

time_varying_known_reals = (
spec[model_key]["time_varying_known_reals"] +
lags_columns +
sma_columns
)
if lags == "None":
lags = None
time_varying_known_reals = (
spec[model_key]["time_varying_known_reals"] +
sma_columns
)
if lags:
lags_columns = [f"lag_{lag}" for lag in range(lags, 0, -1)]
time_varying_known_reals = time_varying_known_reals + lags_columns

if sma:
sma_columns = [f"sma_{sma}" for sma in sma]
time_varying_known_reals = time_varying_known_reals + sma_columns

else:
time_varying_known_reals = spec[model_key]["time_varying_known_reals"]

time_varying_known_categoricals = spec[model_key]["time_varying_known_categoricals"]
max_prediction_length = spec[model_key]["max_prediction_length"]
Expand Down
Loading

0 comments on commit 85f6d88

Please sign in to comment.