Skip to content

Commit

Permalink
tmp: m4
Browse files Browse the repository at this point in the history
  • Loading branch information
zezhishao committed Dec 11, 2023
1 parent f6d0aeb commit e1e8c54
Show file tree
Hide file tree
Showing 12 changed files with 969 additions and 4 deletions.
20 changes: 20 additions & 0 deletions baselines/MLP/MASE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
import torch

def mase(prediction: torch.Tensor, inputs:torch.Tensor, target: torch.Tensor, frequency: int = 1, null_val: float = np.nan) -> np.ndarray:
"""
MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
:param forecast: Forecast values. Shape: batch, time_o
:param insample: Insample values. Shape: batch, time_i
:param outsample: Target values. Shape: batch, time_o
:param frequency: Frequency value
:return: Same shape array with error calculated for each time step
"""
prediction = prediction.detach().cpu().numpy()
target = target.detach().cpu().numpy()
inputs = inputs.detach().cpu().numpy()
a = np.mean(np.abs(prediction - target))
b = np.mean(np.abs(inputs[:-frequency] - inputs[frequency:]))
result = torch.Tensor([a / b])
return result
114 changes: 114 additions & 0 deletions baselines/MLP/MLP_M4_Monthly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
import sys

# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
from basicts.runners import M4Runner
from basicts.losses import masked_mae
from basicts.data import M4ForecastingDataset
from basicts.utils import partial

from .mlp_arch import MultiLayerPerceptron
from .MASE import mase

seasonal_pattern = "Monthly"

frequence = {"Yearly": 1, "Quarterly": 4, "Monthly": 12, "Weekly": 1, "Daily": 1, "Hourly": 24}[seasonal_pattern]
lookback_size = 2 # [2, 3, 4, 5, 6, 7]
CFG = EasyDict()

# ================= general ================= #
CFG.DESCRIPTION = "Multi-layer perceptron model configuration"
CFG.RUNNER = M4Runner
CFG.DATASET_CLS = M4ForecastingDataset
CFG.DATASET_NAME = "M4_" + seasonal_pattern
CFG.DATASET_OUTPUT_LEN = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
CFG.DATASET_INPUT_LEN = lookback_size * CFG.DATASET_OUTPUT_LEN
CFG.GPU_NUM = 1
CFG.METRICS = {"MASE": partial(mase, frequency=frequence)}
CFG.TARGET_METRICS = "MASE"

# ================= environment ================= #
CFG.ENV = EasyDict()
CFG.ENV.SEED = 1
CFG.ENV.CUDNN = EasyDict()
CFG.ENV.CUDNN.ENABLED = True

# ================= model ================= #
CFG.MODEL = EasyDict()
CFG.MODEL.NAME = "MultiLayerPerceptron"
CFG.MODEL.ARCH = MultiLayerPerceptron
CFG.MODEL.PARAM = {
"history_seq_len": CFG.DATASET_INPUT_LEN,
"prediction_seq_len": CFG.DATASET_OUTPUT_LEN,
"hidden_dim": 32
}
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]

# ================= optim ================= #
CFG.TRAIN = EasyDict()
CFG.TRAIN.LOSS = masked_mae
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}

# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
CFG.TRAIN.NUM_EPOCHS = 100
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
"checkpoints",
"_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
)
# train data
CFG.TRAIN.DATA = EasyDict()
# read data
CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
CFG.TRAIN.DATA.NUM_WORKERS = 2
CFG.TRAIN.DATA.PIN_MEMORY = False

# ================= validate ================= #
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
# validating data
CFG.VAL.DATA = EasyDict()
# read data
CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 32
CFG.VAL.DATA.PREFETCH = False
CFG.VAL.DATA.SHUFFLE = False
CFG.VAL.DATA.NUM_WORKERS = 2
CFG.VAL.DATA.PIN_MEMORY = False

# ================= test ================= #
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
# evluation
# test data
CFG.TEST.DATA = EasyDict()
# read data
CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 32
CFG.TEST.DATA.PREFETCH = False
CFG.TEST.DATA.SHUFFLE = False
CFG.TEST.DATA.NUM_WORKERS = 2
CFG.TEST.DATA.PIN_MEMORY = False
113 changes: 113 additions & 0 deletions baselines/MLP/MLP_M4_Weekly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import sys

# TODO: remove it when basicts can be installed by pip
sys.path.append(os.path.abspath(__file__ + "/../../.."))
from easydict import EasyDict
from basicts.runners import M4Runner
from basicts.losses import masked_mae
from basicts.data import M4ForecastingDataset
from basicts.utils import partial

from .mlp_arch import MultiLayerPerceptron
from .MASE import mase

seasonal_pattern = "Weekly"

frequence = {"Yearly": 1, "Quarterly": 4, "Monthly": 12, "Weekly": 1, "Daily": 1, "Hourly": 24}[seasonal_pattern]
CFG = EasyDict()

# ================= general ================= #
CFG.DESCRIPTION = "Multi-layer perceptron model configuration"
CFG.RUNNER = M4Runner
CFG.DATASET_CLS = M4ForecastingDataset
CFG.DATASET_NAME = "M4_" + seasonal_pattern
CFG.DATASET_INPUT_LEN = 26
CFG.DATASET_OUTPUT_LEN = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern]
CFG.GPU_NUM = 1
CFG.METRICS = {"MASE": partial(mase, frequency=frequence)}
CFG.TARGET_METRICS = "MASE"

# ================= environment ================= #
CFG.ENV = EasyDict()
CFG.ENV.SEED = 1
CFG.ENV.CUDNN = EasyDict()
CFG.ENV.CUDNN.ENABLED = True

# ================= model ================= #
CFG.MODEL = EasyDict()
CFG.MODEL.NAME = "MultiLayerPerceptron"
CFG.MODEL.ARCH = MultiLayerPerceptron
CFG.MODEL.PARAM = {
"history_seq_len": CFG.DATASET_INPUT_LEN,
"prediction_seq_len": CFG.DATASET_OUTPUT_LEN,
"hidden_dim": 32
}
CFG.MODEL.FORWARD_FEATURES = [0]
CFG.MODEL.TARGET_FEATURES = [0]

# ================= optim ================= #
CFG.TRAIN = EasyDict()
CFG.TRAIN.LOSS = masked_mae
CFG.TRAIN.OPTIM = EasyDict()
CFG.TRAIN.OPTIM.TYPE = "Adam"
CFG.TRAIN.OPTIM.PARAM = {
"lr": 0.002,
"weight_decay": 1.0e-5,
"eps": 1.0e-8
}
CFG.TRAIN.LR_SCHEDULER = EasyDict()
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR"
CFG.TRAIN.LR_SCHEDULER.PARAM = {
"milestones": [1, 30, 38, 46, 54, 62, 70, 80],
"gamma": 0.5
}

# ================= train ================= #
CFG.TRAIN.CLIP_GRAD_PARAM = {
"max_norm": 5.0
}
CFG.TRAIN.NUM_EPOCHS = 100
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join(
"checkpoints",
"_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)])
)
# train data
CFG.TRAIN.DATA = EasyDict()
# read data
CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.TRAIN.DATA.BATCH_SIZE = 32
CFG.TRAIN.DATA.PREFETCH = False
CFG.TRAIN.DATA.SHUFFLE = True
CFG.TRAIN.DATA.NUM_WORKERS = 2
CFG.TRAIN.DATA.PIN_MEMORY = False

# ================= validate ================= #
CFG.VAL = EasyDict()
CFG.VAL.INTERVAL = 1
# validating data
CFG.VAL.DATA = EasyDict()
# read data
CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.VAL.DATA.BATCH_SIZE = 32
CFG.VAL.DATA.PREFETCH = False
CFG.VAL.DATA.SHUFFLE = False
CFG.VAL.DATA.NUM_WORKERS = 2
CFG.VAL.DATA.PIN_MEMORY = False

# ================= test ================= #
CFG.TEST = EasyDict()
CFG.TEST.INTERVAL = 1
# evluation
# test data
CFG.TEST.DATA = EasyDict()
# read data
CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME
# dataloader args, optional
CFG.TEST.DATA.BATCH_SIZE = 32
CFG.TEST.DATA.PREFETCH = False
CFG.TEST.DATA.SHUFFLE = False
CFG.TEST.DATA.NUM_WORKERS = 2
CFG.TEST.DATA.PIN_MEMORY = False
2 changes: 1 addition & 1 deletion basicts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .launcher import launch_training, launch_runner
from .runners import BaseRunner

__version__ = "0.3.5"
__version__ = "0.3.6"

__all__ = ["__version__", "launch_training", "launch_runner", "BaseRunner"]
3 changes: 2 additions & 1 deletion basicts/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from .registry import SCALER_REGISTRY
from .dataset_zoo.simple_tsf_dataset import TimeSeriesForecastingDataset
from .dataset_zoo.m4_dataset import M4ForecastingDataset

__all__ = ["SCALER_REGISTRY", "TimeSeriesForecastingDataset"]
__all__ = ["SCALER_REGISTRY", "TimeSeriesForecastingDataset", "M4ForecastingDataset"]

# fix bugs on Windows systems and on jupyter
project_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down
84 changes: 84 additions & 0 deletions basicts/data/dataset_zoo/m4_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import random

import torch
from torch.utils.data import Dataset

from ...utils import load_pkl


class M4ForecastingDataset(Dataset):
"""
BasicTS tries its best to follow the commonly-used processing approaches of M4 dataset, while also providing more flexible interfaces.
M4 dataset differs from general MTS datasets in the following aspects:
- M4 dataset is a univariate time series dataset, which does not sample in a synchronized manner.
In the state-of-the-art M4 prediction solutions, NBeats [1], the authors first sample ids of the time series and then randomly sample the time series data for each time series.
- Padding and masking are used to make training more flexible and robust.
- There is no normalization in M4 dataset.
- There is no validation dataset in M4 dataset.
- The test data is the last sample of each time series.
- The future sequence length is fixed for different subsets.
Reference:
[1] N-BEATS: Neural basis expansion analysis for interpretable time series forecasting
[2] https://github.com/ServiceNow/N-BEATS/blob/master/common/sampler.py
"""

def __init__(self, data_file_path: str, index_file_path: str, mask_file_path: str, mode: str) -> None:
super().__init__()
assert mode in ["train", "test"], "error mode"
self._check_if_file_exists(data_file_path, index_file_path, mask_file_path)
# read raw data (normalized)
self.data = load_pkl(data_file_path)[mode] # padded data: List[List]
self.mask = load_pkl(mask_file_path)[mode] # padded mask: List[List]
# read index
self.index = load_pkl(index_file_path)[mode] # train/test index of each time series: List[List]

def _check_if_file_exists(self, data_file_path: str, index_file_path: str, mask_file_path: str):
"""Check if data file and index file exist.
Args:
data_file_path (str): data file path
index_file_path (str): index file path
Raises:
FileNotFoundError: no data file
FileNotFoundError: no index file
"""

if not os.path.isfile(data_file_path):
raise FileNotFoundError("BasicTS can not find data file {0}".format(data_file_path))
if not os.path.isfile(index_file_path):
raise FileNotFoundError("BasicTS can not find index file {0}".format(index_file_path))
if not os.path.isfile(mask_file_path):
raise FileNotFoundError("BasicTS can not find mask file {0}".format(mask_file_path))

def __getitem__(self, ts_id: int) -> tuple:
"""Get a sample.
Args:
ts_id (int): the iteration index, i.e., the time series id (not the self.index).
Returns:
tuple: future_data, history_data, future_mask, history_mask, where the shape of data is L x C and mask is L.
"""

ts_idxs = list(self.index[ts_id])
# random select a time series sample
idx = ts_idxs[random.randint(0, len(ts_idxs)-1)]

history_data = torch.Tensor(self.data[ts_id][idx[0]:idx[1]]).unsqueeze(1).float()
future_data = torch.Tensor(self.data[ts_id][idx[1]:idx[2]]).unsqueeze(1).float()
history_mask = torch.Tensor(self.mask[ts_id][idx[0]:idx[1]]).unsqueeze(1).float()
future_mask = torch.Tensor(self.mask[ts_id][idx[1]:idx[2]]).unsqueeze(1).float()

return future_data, history_data, future_mask, history_mask

def __len__(self):
"""Dataset length (=number of time series)
Returns:
int: dataset length
"""

return len(self.data)
4 changes: 3 additions & 1 deletion basicts/runners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from .base_tsf_runner import BaseTimeSeriesForecastingRunner
from .runner_zoo.simple_tsf_runner import SimpleTimeSeriesForecastingRunner
from .runner_zoo.no_bp_runner import NoBPRunner
from .runner_zoo.m4_tsf_runner import M4Runner

__all__ = ["BaseRunner", "BaseTimeSeriesForecastingRunner",
"SimpleTimeSeriesForecastingRunner", "NoBPRunner"]
"SimpleTimeSeriesForecastingRunner", "NoBPRunner",
"M4Runner"]
Loading

0 comments on commit e1e8c54

Please sign in to comment.