Skip to content

Commit

Permalink
Revise learnable time id. Add 2 stage multiscale finetune.
Browse files Browse the repository at this point in the history
  • Loading branch information
zqiao11 committed Dec 3, 2024
1 parent f848942 commit e0a8eed
Show file tree
Hide file tree
Showing 46 changed files with 1,941 additions and 103 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: electricity
train_length: 18412
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: ETTh1
train_length: 8640
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: ETTh2
train_length: 8640
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: ETTm1
train_length: 34560
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: ETTm2
train_length: 34560
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
_target_: uni2ts.data.builder.simple.generate_finetune_builder
dataset: weather
train_length: 36887
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
115 changes: 115 additions & 0 deletions cli/conf/lsf-setup/multi_scale/finetune_two_stage/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
hydra:
run:
dir: outputs/lsf-setup/multi_scale/finetune_two_stage/${hydra:runtime.choices.model}/${exp_name}/${model.finetune_pattern}/${hydra:runtime.choices.data}/${data.mode}/${run_name}
defaults:
- model: ???
- data: ???
- val_data: null
- _self_
exp_name: ???
run_name: ???
seed: 0
tf32: true
compile: false # set to mode: default, reduce-overhead, max-autotune
ckpt_path: null

trainer_warmup:
_target_: lightning.Trainer
accelerator: auto
strategy: auto
devices: auto
num_nodes: 1
precision: 32
logger:
_target_: lightning.pytorch.loggers.TensorBoardLogger
save_dir: ${hydra:runtime.output_dir}
name: logs
callbacks:
- _target_: lightning.pytorch.callbacks.LearningRateMonitor
logging_interval: epoch
- _target_: lightning.pytorch.callbacks.ModelCheckpoint
dirpath: ${hydra:runtime.output_dir}/checkpoints_warmup
monitor: val/PackedNLLLoss
save_weights_only: true
mode: min
save_top_k: 1
every_n_epochs: 1
- _target_: lightning.pytorch.callbacks.EarlyStopping
monitor: val/PackedNLLLoss
min_delta: 0.0
patience: 3
mode: min
strict: false
verbose: true
max_epochs: 30
enable_progress_bar: true
accumulate_grad_batches: 1
gradient_clip_val: 1.0
gradient_clip_algorithm: norm


trainer:
_target_: lightning.Trainer
accelerator: auto
strategy: auto
devices: auto
num_nodes: 1
precision: 32
logger:
_target_: lightning.pytorch.loggers.TensorBoardLogger
save_dir: ${hydra:runtime.output_dir}
name: logs
callbacks:
- _target_: lightning.pytorch.callbacks.LearningRateMonitor
logging_interval: epoch
- _target_: lightning.pytorch.callbacks.ModelCheckpoint
dirpath: ${hydra:runtime.output_dir}/checkpoints
monitor: val/PackedNLLLoss
save_weights_only: true
mode: min
save_top_k: 1 # Qz: Sometimes the 1st validation gets anomalous results. Discard that ckpt, and use the 2nd one.
every_n_epochs: 1
- _target_: lightning.pytorch.callbacks.ModelCheckpoint
dirpath: ${hydra:runtime.output_dir}/checkpoints
save_weights_only: true
- _target_: lightning.pytorch.callbacks.EarlyStopping # uni2ts.callbacks.earlystop.WarmupEarlyStopping
monitor: val/PackedNLLLoss
min_delta: 0.0
patience: 3 # Set to a small value as now each epoch has many batches.
mode: min
strict: false
verbose: true
# warmup_steps: 1
max_epochs: 1000
enable_progress_bar: true
accumulate_grad_batches: 1
gradient_clip_val: 1.0
gradient_clip_algorithm: norm
train_dataloader:
_target_: uni2ts.data.loader.DataLoader
batch_size: 512 # Can use a large batch size after disabling sequence packing.
batch_size_factor: 2.0
cycle: false # Set it as false to loop over all batches per epoch
num_batches_per_epoch: null
shuffle: true
num_workers: 11
pin_memory: true
drop_last: false
fill_last: false
worker_init_fn: null
prefetch_factor: 2
persistent_workers: true
val_dataloader:
_target_: uni2ts.data.loader.DataLoader
batch_size: 32
batch_size_factor: 2.0
cycle: false
num_batches_per_epoch: null
shuffle: false
num_workers: 11
pin_memory: false
drop_last: false
fill_last: false
worker_init_fn: null
prefetch_factor: 2
persistent_workers: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# load a pretrained checkpoint from huggingface hub
_target_: uni2ts.model.multi_scale_moirai.TwoStageMoiraiFinetune
module:
_target_: uni2ts.model.multi_scale_moirai.MoiraiModule.from_pretrained
pretrained_model_name_or_path: Salesforce/moirai-1.0-R-base
module_kwargs:
_target_: builtins.dict
distr_output:
_target_: uni2ts.distribution.MixtureOutput
components:
- _target_: uni2ts.distribution.StudentTOutput
- _target_: uni2ts.distribution.NormalFixedScaleOutput
- _target_: uni2ts.distribution.NegativeBinomialOutput
- _target_: uni2ts.distribution.LogNormalOutput
d_model: 768
num_layers: 12
patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
max_seq_len: 512
attn_dropout_p: 0.0
dropout_p: 0.0
scaling: true
min_patches: 2
min_mask_ratio: 0.15
max_mask_ratio: 0.5
max_dim: 128
loss_func:
_target_: uni2ts.loss.packed.PackedNLLLoss
val_metric:
- _target_: uni2ts.loss.packed.PackedMSELoss
- _target_: uni2ts.loss.packed.PackedNRMSELoss
normalize: absolute_target_squared
lr: 5e-7 # On ETT dataset, using 1e-6/5e-7 converge within 1-2 epochs. 1e-7 converge in tens of epochs
weight_decay: 1e-1
beta1: 0.9
beta2: 0.98
num_training_steps: null
num_warmup_steps: 0
patch_size: null
context_length: null
prediction_length: null
finetune_pattern: full
num_new_scales: 3
ds_factor: 2

use_lora: True
lora_kwargs:
_target_: builtins.dict
r: 16
target_modules: ["q_proj", "k_proj", "v_proj"]
lora_alpha: 32
lora_dropout: 0.05
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# load a pretrained checkpoint from huggingface hub
_target_: uni2ts.model.multi_scale_moirai.TwoStageMoiraiFinetune
module:
_target_: uni2ts.model.multi_scale_moirai.MoiraiModule.from_pretrained
pretrained_model_name_or_path: Salesforce/moirai-1.0-R-small
module_kwargs:
_target_: builtins.dict
distr_output:
_target_: uni2ts.distribution.MixtureOutput
components:
- _target_: uni2ts.distribution.StudentTOutput
- _target_: uni2ts.distribution.NormalFixedScaleOutput
- _target_: uni2ts.distribution.NegativeBinomialOutput
- _target_: uni2ts.distribution.LogNormalOutput
d_model: 384
num_layers: 6
patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
max_seq_len: 512
attn_dropout_p: 0.0
dropout_p: 0.0
scaling: true
min_patches: 2
min_mask_ratio: 0.15
max_mask_ratio: 0.5
max_dim: 128
loss_func:
_target_: uni2ts.loss.packed.PackedNLLLoss
val_metric:
- _target_: uni2ts.loss.packed.PackedMSELoss
- _target_: uni2ts.loss.packed.PackedNRMSELoss
normalize: absolute_target_squared
lr: 5e-7
weight_decay: 1e-1
beta1: 0.9
beta2: 0.98
num_training_steps: null
num_warmup_steps: 0
patch_size: null
context_length: null
prediction_length: null
finetune_pattern: full
num_new_scales: 3
ds_factor: 2

use_lora: False
lora_kwargs:
_target_: builtins.dict
r: 16
target_modules: ["q_proj", "k_proj", "v_proj"]
lora_alpha: 32
lora_dropout: 0.05
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# load a pretrained checkpoint from huggingface hub
_target_: uni2ts.model.multi_scale_moirai.MoiraiFinetune
module:
_target_: uni2ts.model.multi_scale_moirai.MoiraiModule.from_pretrained
pretrained_model_name_or_path: Salesforce/moirai-1.1-R-small
module_kwargs:
_target_: builtins.dict
distr_output:
_target_: uni2ts.distribution.MixtureOutput
components:
- _target_: uni2ts.distribution.StudentTOutput
- _target_: uni2ts.distribution.NormalFixedScaleOutput
- _target_: uni2ts.distribution.NegativeBinomialOutput
- _target_: uni2ts.distribution.LogNormalOutput
d_model: 384
num_layers: 6
patch_sizes: ${as_tuple:[8, 16, 32, 64, 128]}
max_seq_len: 512
attn_dropout_p: 0.0
dropout_p: 0.0
scaling: true
min_patches: 2
min_mask_ratio: 0.15
max_mask_ratio: 0.5
max_dim: 128
loss_func:
_target_: uni2ts.loss.packed.PackedNLLLoss
val_metric:
- _target_: uni2ts.loss.packed.PackedMSELoss
- _target_: uni2ts.loss.packed.PackedNRMSELoss
normalize: absolute_target_squared
lr: 5e-7 # On ETT dataset, using 1e-6/5e-7 converge within 1-2 epochs. 1e-7 converge in tens of epochs
weight_decay: 1e-1
beta1: 0.9
beta2: 0.98
num_training_steps: null
num_warmup_steps: 0
patch_size: null
context_length: null
prediction_length: null
finetune_pattern: full
num_new_scales: 3
ds_factor: 2

use_lora: False
lora_kwargs:
_target_: builtins.dict
r: 16
target_modules: ["q_proj", "k_proj", "v_proj"]
lora_alpha: 32
lora_dropout: 0.05
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: electricity_eval
offset: 18412 # Same as _lsf_dataset.py
eval_length: 2630 # Same as _lsf_dataset.py, test_length=5260
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: ETTh1_eval
offset: 8640
eval_length: 2880
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: ETTh2_eval
offset: 8640 # Same as _lsf_dataset.py
eval_length: 2880 # Same as _lsf_dataset.py
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: ETTm1_eval
offset: 34560 # Same as _lsf_dataset.py
eval_length: 11520 # Same as _lsf_dataset.py
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: ETTm2_eval
offset: 34560 # Same as _lsf_dataset.py
eval_length: 11520 # Same as _lsf_dataset.py
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_target_: uni2ts.data.builder.simple.generate_eval_builder
dataset: weather_eval
offset: 36887 # Same as _lsf_dataset.py
eval_length: 5269 # Same as _lsf_dataset.py; test_length=10539
prediction_length: ???
context_length: ???
patch_size: ???
mode: ???
Loading

0 comments on commit e0a8eed

Please sign in to comment.