-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
969 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import numpy as np | ||
import torch | ||
|
||
def mase(prediction: torch.Tensor, inputs:torch.Tensor, target: torch.Tensor, frequency: int = 1, null_val: float = np.nan) -> np.ndarray: | ||
""" | ||
MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf | ||
:param forecast: Forecast values. Shape: batch, time_o | ||
:param insample: Insample values. Shape: batch, time_i | ||
:param outsample: Target values. Shape: batch, time_o | ||
:param frequency: Frequency value | ||
:return: Same shape array with error calculated for each time step | ||
""" | ||
prediction = prediction.detach().cpu().numpy() | ||
target = target.detach().cpu().numpy() | ||
inputs = inputs.detach().cpu().numpy() | ||
a = np.mean(np.abs(prediction - target)) | ||
b = np.mean(np.abs(inputs[:-frequency] - inputs[frequency:])) | ||
result = torch.Tensor([a / b]) | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import os | ||
import sys | ||
|
||
# TODO: remove it when basicts can be installed by pip | ||
sys.path.append(os.path.abspath(__file__ + "/../../..")) | ||
from easydict import EasyDict | ||
from basicts.runners import M4Runner | ||
from basicts.losses import masked_mae | ||
from basicts.data import M4ForecastingDataset | ||
from basicts.utils import partial | ||
|
||
from .mlp_arch import MultiLayerPerceptron | ||
from .MASE import mase | ||
|
||
seasonal_pattern = "Monthly" | ||
|
||
frequence = {"Yearly": 1, "Quarterly": 4, "Monthly": 12, "Weekly": 1, "Daily": 1, "Hourly": 24}[seasonal_pattern] | ||
lookback_size = 2 # [2, 3, 4, 5, 6, 7] | ||
CFG = EasyDict() | ||
|
||
# ================= general ================= # | ||
CFG.DESCRIPTION = "Multi-layer perceptron model configuration" | ||
CFG.RUNNER = M4Runner | ||
CFG.DATASET_CLS = M4ForecastingDataset | ||
CFG.DATASET_NAME = "M4_" + seasonal_pattern | ||
CFG.DATASET_OUTPUT_LEN = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern] | ||
CFG.DATASET_INPUT_LEN = lookback_size * CFG.DATASET_OUTPUT_LEN | ||
CFG.GPU_NUM = 1 | ||
CFG.METRICS = {"MASE": partial(mase, frequency=frequence)} | ||
CFG.TARGET_METRICS = "MASE" | ||
|
||
# ================= environment ================= # | ||
CFG.ENV = EasyDict() | ||
CFG.ENV.SEED = 1 | ||
CFG.ENV.CUDNN = EasyDict() | ||
CFG.ENV.CUDNN.ENABLED = True | ||
|
||
# ================= model ================= # | ||
CFG.MODEL = EasyDict() | ||
CFG.MODEL.NAME = "MultiLayerPerceptron" | ||
CFG.MODEL.ARCH = MultiLayerPerceptron | ||
CFG.MODEL.PARAM = { | ||
"history_seq_len": CFG.DATASET_INPUT_LEN, | ||
"prediction_seq_len": CFG.DATASET_OUTPUT_LEN, | ||
"hidden_dim": 32 | ||
} | ||
CFG.MODEL.FORWARD_FEATURES = [0] | ||
CFG.MODEL.TARGET_FEATURES = [0] | ||
|
||
# ================= optim ================= # | ||
CFG.TRAIN = EasyDict() | ||
CFG.TRAIN.LOSS = masked_mae | ||
CFG.TRAIN.OPTIM = EasyDict() | ||
CFG.TRAIN.OPTIM.TYPE = "Adam" | ||
CFG.TRAIN.OPTIM.PARAM = { | ||
"lr": 0.002, | ||
"weight_decay": 1.0e-5, | ||
"eps": 1.0e-8 | ||
} | ||
CFG.TRAIN.LR_SCHEDULER = EasyDict() | ||
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" | ||
CFG.TRAIN.LR_SCHEDULER.PARAM = { | ||
"milestones": [1, 30, 38, 46, 54, 62, 70, 80], | ||
"gamma": 0.5 | ||
} | ||
|
||
# ================= train ================= # | ||
CFG.TRAIN.CLIP_GRAD_PARAM = { | ||
"max_norm": 5.0 | ||
} | ||
CFG.TRAIN.NUM_EPOCHS = 100 | ||
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( | ||
"checkpoints", | ||
"_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)]) | ||
) | ||
# train data | ||
CFG.TRAIN.DATA = EasyDict() | ||
# read data | ||
CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.TRAIN.DATA.BATCH_SIZE = 32 | ||
CFG.TRAIN.DATA.PREFETCH = False | ||
CFG.TRAIN.DATA.SHUFFLE = True | ||
CFG.TRAIN.DATA.NUM_WORKERS = 2 | ||
CFG.TRAIN.DATA.PIN_MEMORY = False | ||
|
||
# ================= validate ================= # | ||
CFG.VAL = EasyDict() | ||
CFG.VAL.INTERVAL = 1 | ||
# validating data | ||
CFG.VAL.DATA = EasyDict() | ||
# read data | ||
CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.VAL.DATA.BATCH_SIZE = 32 | ||
CFG.VAL.DATA.PREFETCH = False | ||
CFG.VAL.DATA.SHUFFLE = False | ||
CFG.VAL.DATA.NUM_WORKERS = 2 | ||
CFG.VAL.DATA.PIN_MEMORY = False | ||
|
||
# ================= test ================= # | ||
CFG.TEST = EasyDict() | ||
CFG.TEST.INTERVAL = 1 | ||
# evluation | ||
# test data | ||
CFG.TEST.DATA = EasyDict() | ||
# read data | ||
CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.TEST.DATA.BATCH_SIZE = 32 | ||
CFG.TEST.DATA.PREFETCH = False | ||
CFG.TEST.DATA.SHUFFLE = False | ||
CFG.TEST.DATA.NUM_WORKERS = 2 | ||
CFG.TEST.DATA.PIN_MEMORY = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import os | ||
import sys | ||
|
||
# TODO: remove it when basicts can be installed by pip | ||
sys.path.append(os.path.abspath(__file__ + "/../../..")) | ||
from easydict import EasyDict | ||
from basicts.runners import M4Runner | ||
from basicts.losses import masked_mae | ||
from basicts.data import M4ForecastingDataset | ||
from basicts.utils import partial | ||
|
||
from .mlp_arch import MultiLayerPerceptron | ||
from .MASE import mase | ||
|
||
seasonal_pattern = "Weekly" | ||
|
||
frequence = {"Yearly": 1, "Quarterly": 4, "Monthly": 12, "Weekly": 1, "Daily": 1, "Hourly": 24}[seasonal_pattern] | ||
CFG = EasyDict() | ||
|
||
# ================= general ================= # | ||
CFG.DESCRIPTION = "Multi-layer perceptron model configuration" | ||
CFG.RUNNER = M4Runner | ||
CFG.DATASET_CLS = M4ForecastingDataset | ||
CFG.DATASET_NAME = "M4_" + seasonal_pattern | ||
CFG.DATASET_INPUT_LEN = 26 | ||
CFG.DATASET_OUTPUT_LEN = {"Yearly": 6, "Quarterly": 8, "Monthly": 18, "Weekly": 13, "Daily": 14, "Hourly": 48}[seasonal_pattern] | ||
CFG.GPU_NUM = 1 | ||
CFG.METRICS = {"MASE": partial(mase, frequency=frequence)} | ||
CFG.TARGET_METRICS = "MASE" | ||
|
||
# ================= environment ================= # | ||
CFG.ENV = EasyDict() | ||
CFG.ENV.SEED = 1 | ||
CFG.ENV.CUDNN = EasyDict() | ||
CFG.ENV.CUDNN.ENABLED = True | ||
|
||
# ================= model ================= # | ||
CFG.MODEL = EasyDict() | ||
CFG.MODEL.NAME = "MultiLayerPerceptron" | ||
CFG.MODEL.ARCH = MultiLayerPerceptron | ||
CFG.MODEL.PARAM = { | ||
"history_seq_len": CFG.DATASET_INPUT_LEN, | ||
"prediction_seq_len": CFG.DATASET_OUTPUT_LEN, | ||
"hidden_dim": 32 | ||
} | ||
CFG.MODEL.FORWARD_FEATURES = [0] | ||
CFG.MODEL.TARGET_FEATURES = [0] | ||
|
||
# ================= optim ================= # | ||
CFG.TRAIN = EasyDict() | ||
CFG.TRAIN.LOSS = masked_mae | ||
CFG.TRAIN.OPTIM = EasyDict() | ||
CFG.TRAIN.OPTIM.TYPE = "Adam" | ||
CFG.TRAIN.OPTIM.PARAM = { | ||
"lr": 0.002, | ||
"weight_decay": 1.0e-5, | ||
"eps": 1.0e-8 | ||
} | ||
CFG.TRAIN.LR_SCHEDULER = EasyDict() | ||
CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" | ||
CFG.TRAIN.LR_SCHEDULER.PARAM = { | ||
"milestones": [1, 30, 38, 46, 54, 62, 70, 80], | ||
"gamma": 0.5 | ||
} | ||
|
||
# ================= train ================= # | ||
CFG.TRAIN.CLIP_GRAD_PARAM = { | ||
"max_norm": 5.0 | ||
} | ||
CFG.TRAIN.NUM_EPOCHS = 100 | ||
CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( | ||
"checkpoints", | ||
"_".join([CFG.MODEL.NAME, str(CFG.TRAIN.NUM_EPOCHS)]) | ||
) | ||
# train data | ||
CFG.TRAIN.DATA = EasyDict() | ||
# read data | ||
CFG.TRAIN.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.TRAIN.DATA.BATCH_SIZE = 32 | ||
CFG.TRAIN.DATA.PREFETCH = False | ||
CFG.TRAIN.DATA.SHUFFLE = True | ||
CFG.TRAIN.DATA.NUM_WORKERS = 2 | ||
CFG.TRAIN.DATA.PIN_MEMORY = False | ||
|
||
# ================= validate ================= # | ||
CFG.VAL = EasyDict() | ||
CFG.VAL.INTERVAL = 1 | ||
# validating data | ||
CFG.VAL.DATA = EasyDict() | ||
# read data | ||
CFG.VAL.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.VAL.DATA.BATCH_SIZE = 32 | ||
CFG.VAL.DATA.PREFETCH = False | ||
CFG.VAL.DATA.SHUFFLE = False | ||
CFG.VAL.DATA.NUM_WORKERS = 2 | ||
CFG.VAL.DATA.PIN_MEMORY = False | ||
|
||
# ================= test ================= # | ||
CFG.TEST = EasyDict() | ||
CFG.TEST.INTERVAL = 1 | ||
# evluation | ||
# test data | ||
CFG.TEST.DATA = EasyDict() | ||
# read data | ||
CFG.TEST.DATA.DIR = "datasets/" + CFG.DATASET_NAME | ||
# dataloader args, optional | ||
CFG.TEST.DATA.BATCH_SIZE = 32 | ||
CFG.TEST.DATA.PREFETCH = False | ||
CFG.TEST.DATA.SHUFFLE = False | ||
CFG.TEST.DATA.NUM_WORKERS = 2 | ||
CFG.TEST.DATA.PIN_MEMORY = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
from .launcher import launch_training, launch_runner | ||
from .runners import BaseRunner | ||
|
||
__version__ = "0.3.5" | ||
__version__ = "0.3.6" | ||
|
||
__all__ = ["__version__", "launch_training", "launch_runner", "BaseRunner"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import os | ||
import random | ||
|
||
import torch | ||
from torch.utils.data import Dataset | ||
|
||
from ...utils import load_pkl | ||
|
||
|
||
class M4ForecastingDataset(Dataset): | ||
""" | ||
BasicTS tries its best to follow the commonly-used processing approaches of M4 dataset, while also providing more flexible interfaces. | ||
M4 dataset differs from general MTS datasets in the following aspects: | ||
- M4 dataset is a univariate time series dataset, which does not sample in a synchronized manner. | ||
In the state-of-the-art M4 prediction solutions, NBeats [1], the authors first sample ids of the time series and then randomly sample the time series data for each time series. | ||
- Padding and masking are used to make training more flexible and robust. | ||
- There is no normalization in M4 dataset. | ||
- There is no validation dataset in M4 dataset. | ||
- The test data is the last sample of each time series. | ||
- The future sequence length is fixed for different subsets. | ||
Reference: | ||
[1] N-BEATS: Neural basis expansion analysis for interpretable time series forecasting | ||
[2] https://github.com/ServiceNow/N-BEATS/blob/master/common/sampler.py | ||
""" | ||
|
||
def __init__(self, data_file_path: str, index_file_path: str, mask_file_path: str, mode: str) -> None: | ||
super().__init__() | ||
assert mode in ["train", "test"], "error mode" | ||
self._check_if_file_exists(data_file_path, index_file_path, mask_file_path) | ||
# read raw data (normalized) | ||
self.data = load_pkl(data_file_path)[mode] # padded data: List[List] | ||
self.mask = load_pkl(mask_file_path)[mode] # padded mask: List[List] | ||
# read index | ||
self.index = load_pkl(index_file_path)[mode] # train/test index of each time series: List[List] | ||
|
||
def _check_if_file_exists(self, data_file_path: str, index_file_path: str, mask_file_path: str): | ||
"""Check if data file and index file exist. | ||
Args: | ||
data_file_path (str): data file path | ||
index_file_path (str): index file path | ||
Raises: | ||
FileNotFoundError: no data file | ||
FileNotFoundError: no index file | ||
""" | ||
|
||
if not os.path.isfile(data_file_path): | ||
raise FileNotFoundError("BasicTS can not find data file {0}".format(data_file_path)) | ||
if not os.path.isfile(index_file_path): | ||
raise FileNotFoundError("BasicTS can not find index file {0}".format(index_file_path)) | ||
if not os.path.isfile(mask_file_path): | ||
raise FileNotFoundError("BasicTS can not find mask file {0}".format(mask_file_path)) | ||
|
||
def __getitem__(self, ts_id: int) -> tuple: | ||
"""Get a sample. | ||
Args: | ||
ts_id (int): the iteration index, i.e., the time series id (not the self.index). | ||
Returns: | ||
tuple: future_data, history_data, future_mask, history_mask, where the shape of data is L x C and mask is L. | ||
""" | ||
|
||
ts_idxs = list(self.index[ts_id]) | ||
# random select a time series sample | ||
idx = ts_idxs[random.randint(0, len(ts_idxs)-1)] | ||
|
||
history_data = torch.Tensor(self.data[ts_id][idx[0]:idx[1]]).unsqueeze(1).float() | ||
future_data = torch.Tensor(self.data[ts_id][idx[1]:idx[2]]).unsqueeze(1).float() | ||
history_mask = torch.Tensor(self.mask[ts_id][idx[0]:idx[1]]).unsqueeze(1).float() | ||
future_mask = torch.Tensor(self.mask[ts_id][idx[1]:idx[2]]).unsqueeze(1).float() | ||
|
||
return future_data, history_data, future_mask, history_mask | ||
|
||
def __len__(self): | ||
"""Dataset length (=number of time series) | ||
Returns: | ||
int: dataset length | ||
""" | ||
|
||
return len(self.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.