diff --git a/cli/conf/lsf-setup/multi_scale/finetune/default.yaml b/cli/conf/lsf-setup/multi_scale/finetune/default.yaml index d3135e2..83f4320 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/default.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/default.yaml @@ -33,6 +33,9 @@ trainer: mode: min save_top_k: 1 # Qz: Sometimes the 1st validation gets anomalous results. Discard that ckpt, and use the 2nd one. every_n_epochs: 1 + - _target_: lightning.pytorch.callbacks.ModelCheckpoint + dirpath: ${hydra:runtime.output_dir}/checkpoints + save_weights_only: true - _target_: lightning.pytorch.callbacks.EarlyStopping # uni2ts.callbacks.earlystop.WarmupEarlyStopping monitor: val/PackedNLLLoss min_delta: 0.0 @@ -41,10 +44,6 @@ trainer: strict: false verbose: true # warmup_steps: 1 - - _target_: lightning.pytorch.callbacks.ModelCheckpoint - dirpath: ${hydra:runtime.output_dir}/checkpoints - save_last: true - save_weights_only: true max_epochs: 1000 enable_progress_bar: true accumulate_grad_batches: 1 diff --git a/cli/train.py b/cli/train.py index 1ece54a..2270d1d 100644 --- a/cli/train.py +++ b/cli/train.py @@ -139,7 +139,8 @@ def main(cfg: DictConfig): trainer: L.Trainer = instantiate(cfg.trainer) # '=' in ckpt name make it cannot be directly loaded with hydra. Change it to '_'. - trainer.callbacks[-1].CHECKPOINT_EQUALS_CHAR = "_" + trainer.callbacks[1].CHECKPOINT_EQUALS_CHAR = "_" + trainer.callbacks[2].CHECKPOINT_EQUALS_CHAR = "_" train_dataset: Dataset = instantiate(cfg.data).load_dataset( model.train_transform_map diff --git a/project/lsf-setup/multi_scale/eval/small/ettm1.sh b/project/lsf-setup/multi_scale/eval/small/ettm1.sh index 135bb25..b5cb524 100644 --- a/project/lsf-setup/multi_scale/eval/small/ettm1.sh +++ b/project/lsf-setup/multi_scale/eval/small/ettm1.sh @@ -6,21 +6,24 @@ export CUDA_VISIBLE_DEVICES=0 mode=S cp=conf/lsf-setup/multi_scale/eval exp_name=lsf -cl=3000 +cl=4000 model=moirai_lightning_ckpt -cpp1='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm1/cl3000_pl96/checkpoints/epoch_2-step_1293.ckpt' -cpp2='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm1/cl3000_pl192/checkpoints/epoch_1-step_858.ckpt' -cpp3='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm1/cl3000_pl336/checkpoints/epoch_0-step_427.ckpt' -cpp4='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm1/cl3000_pl720/checkpoints/epoch_0-step_422.ckpt' +#cpp1='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm1/S/cl4000_pl96/checkpoints/epoch_3-step_1668.ckpt' +#cpp2='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm2/S/cl3000_pl192/checkpoints/epoch_2-step_1287.ckpt' +cpp3='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm1/S/cl4000_pl336/checkpoints/epoch_3-step_1656.ckpt' +cpp4='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm1/S/cl4000_pl720/checkpoints/epoch_3-step_1632.ckpt' index=1 -for pl in 96 192 336 720; do +for pl in 336 720; do # 96 192 case $index in - 1) cpp=$cpp1 ;; - 2) cpp=$cpp2 ;; - 3) cpp=$cpp3 ;; - 4) cpp=$cpp4 ;; + 1) cpp=$cpp3 ;; + 2) cpp=$cpp4 ;; + +# 1) cpp=$cpp1 ;; +# 2) cpp=$cpp2 ;; +# 3) cpp=$cpp3 ;; +# 4) cpp=$cpp4 ;; esac pretrained_model=$(echo $cpp | cut -d'/' -f6) diff --git a/project/lsf-setup/multi_scale/eval/small/ettm2.sh b/project/lsf-setup/multi_scale/eval/small/ettm2.sh index d39c4e7..903ff35 100644 --- a/project/lsf-setup/multi_scale/eval/small/ettm2.sh +++ b/project/lsf-setup/multi_scale/eval/small/ettm2.sh @@ -1,7 +1,7 @@ #!/bin/bash export HYDRA_FULL_ERROR=1 -export CUDA_VISIBLE_DEVICES=0 +export CUDA_VISIBLE_DEVICES=3 mode=S cp=conf/lsf-setup/multi_scale/eval @@ -9,18 +9,22 @@ exp_name=lsf cl=3000 model=moirai_lightning_ckpt -cpp1='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm2/cl3000_pl96/checkpoints/epoch_5-step_2586.ckpt' -cpp2='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm2/cl3000_pl192/checkpoints/epoch_2-step_1287.ckpt' -cpp3='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm2/cl3000_pl336/checkpoints/epoch_0-step_427.ckpt' -cpp4='./outputs/multi_scale/finetune/moirai_1.1_R_small/lsf/full_ms_rope/ettm2/cl3000_pl720/checkpoints/epoch_0-step_422.ckpt' +#cpp1='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm2/S/cl3000_pl96/checkpoints/epoch_5-step_2586.ckpt' +cpp2='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm2/S/cl3000_pl192/checkpoints/epoch_2-step_1287.ckpt' +cpp3='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm2/S/cl3000_pl336/checkpoints/epoch_3-step_1708.ckpt' +cpp4='./outputs/lsf-setup/multi_scale/finetune/moirai_1.0_R_small/lsf/full/ettm2/S/cl4000_pl720/checkpoints/epoch_3-step_1688.ckpt' index=1 -for pl in 96 192 336 720; do +for pl in 192 336 720; do # 96 case $index in - 1) cpp=$cpp1 ;; - 2) cpp=$cpp2 ;; - 3) cpp=$cpp3 ;; - 4) cpp=$cpp4 ;; + 1) cpp=$cpp2 ;; + 2) cpp=$cpp3 ;; + 3) cpp=$cpp4 ;; + +# 1) cpp=$cpp1 ;; +# 2) cpp=$cpp2 ;; +# 3) cpp=$cpp3 ;; +# 4) cpp=$cpp4 ;; esac pretrained_model=$(echo $cpp | cut -d'/' -f6) diff --git a/src/uni2ts/model/multi_scale_moirai/forecast.py b/src/uni2ts/model/multi_scale_moirai/forecast.py index cd9b5aa..86252c9 100644 --- a/src/uni2ts/model/multi_scale_moirai/forecast.py +++ b/src/uni2ts/model/multi_scale_moirai/forecast.py @@ -128,15 +128,15 @@ def __init__( def post_init(self): - # for layer in self.module.encoder.layers: - # # Check if the layer has an attribute named `self_attn` and if it is an instance of GroupedQueryAttention - # if hasattr(layer, 'self_attn') and isinstance(layer.self_attn, GroupedQueryAttention): - # # Call post_init() method of the GroupedQueryAttention object - # layer.self_attn.init_multi_scale_modules(self.hparams.context_length, self.hparams.patch_size, self.num_new_scales, self.ds_factor) - - for module in self.module.encoder.modules(): - if isinstance(module, MultiScaleRotaryProjection): - module.post_init(self.token_idx_per_scale) + for layer in self.module.encoder.layers: + # Check if the layer has an attribute named `self_attn` and if it is an instance of GroupedQueryAttention + if hasattr(layer, 'self_attn') and isinstance(layer.self_attn, GroupedQueryAttention): + # Call post_init() method of the GroupedQueryAttention object + layer.self_attn.init_multi_scale_modules(self.hparams.context_length, self.hparams.patch_size, self.num_new_scales, self.ds_factor) + + # for module in self.module.encoder.modules(): + # if isinstance(module, MultiScaleRotaryProjection): + # module.post_init(self.token_idx_per_scale) def _get_token_idx_per_scale(self):