diff --git a/cli/conf/lsf-setup/lsf/finetune/data/electricity.yaml b/cli/conf/lsf-setup/lsf/finetune/data/electricity.yaml index 70ca032..73e7350 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/electricity.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/electricity.yaml @@ -4,3 +4,4 @@ train_length: 18412 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? diff --git a/cli/conf/lsf-setup/lsf/finetune/data/etth1.yaml b/cli/conf/lsf-setup/lsf/finetune/data/etth1.yaml index f7235c4..bd54733 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/etth1.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/etth1.yaml @@ -3,4 +3,5 @@ dataset: ETTh1 train_length: 8640 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/data/etth2.yaml b/cli/conf/lsf-setup/lsf/finetune/data/etth2.yaml index 1dd47f7..6e3eede 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/etth2.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/etth2.yaml @@ -3,4 +3,5 @@ dataset: ETTh2 train_length: 8640 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/data/ettm1.yaml b/cli/conf/lsf-setup/lsf/finetune/data/ettm1.yaml index dbde79e..2f84768 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/ettm1.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/ettm1.yaml @@ -3,4 +3,5 @@ dataset: ETTm1 train_length: 34560 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/data/ettm2.yaml b/cli/conf/lsf-setup/lsf/finetune/data/ettm2.yaml index 5c402f1..1d8e32b 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/ettm2.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/ettm2.yaml @@ -4,3 +4,4 @@ train_length: 34560 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/data/weather.yaml b/cli/conf/lsf-setup/lsf/finetune/data/weather.yaml index a6fa5fd..86b5bcc 100644 --- a/cli/conf/lsf-setup/lsf/finetune/data/weather.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/data/weather.yaml @@ -4,3 +4,4 @@ train_length: 36887 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? diff --git a/cli/conf/lsf-setup/lsf/finetune/default.yaml b/cli/conf/lsf-setup/lsf/finetune/default.yaml index 67ac76a..1ea168e 100644 --- a/cli/conf/lsf-setup/lsf/finetune/default.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/default.yaml @@ -1,6 +1,6 @@ hydra: run: - dir: outputs/lsf-setup/lsf/finetune/${hydra:runtime.choices.model}/${exp_name}/${model.finetune_pattern}/${hydra:runtime.choices.data}/${run_name} + dir: outputs/lsf-setup/lsf/finetune/${hydra:runtime.choices.model}/${exp_name}/${model.finetune_pattern}/${hydra:runtime.choices.data}/${data.mode}/${run_name} defaults: - model: ??? - data: ??? diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/electricity.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/electricity.yaml index 74981e0..a20c574 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/electricity.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/electricity.yaml @@ -4,4 +4,5 @@ offset: 18412 # Same as _lsf_dataset.py eval_length: 2630 # Same as _lsf_dataset.py, test_length=5260 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/etth1.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/etth1.yaml index a409cde..2a379ab 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/etth1.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/etth1.yaml @@ -4,4 +4,5 @@ offset: 8640 eval_length: 2880 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/etth2.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/etth2.yaml index 31ca968..90e8296 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/etth2.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/etth2.yaml @@ -4,4 +4,5 @@ offset: 8640 # Same as _lsf_dataset.py eval_length: 2880 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/ettm1.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/ettm1.yaml index 3f0244c..3cdf94b 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/ettm1.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/ettm1.yaml @@ -4,4 +4,5 @@ offset: 34560 # Same as _lsf_dataset.py eval_length: 11520 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/ettm2.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/ettm2.yaml index 0939493..74ae64c 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/ettm2.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/ettm2.yaml @@ -4,4 +4,5 @@ offset: 34560 # Same as _lsf_dataset.py eval_length: 11520 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/lsf/finetune/val_data/weather.yaml b/cli/conf/lsf-setup/lsf/finetune/val_data/weather.yaml index c2a23de..1d4e331 100644 --- a/cli/conf/lsf-setup/lsf/finetune/val_data/weather.yaml +++ b/cli/conf/lsf-setup/lsf/finetune/val_data/weather.yaml @@ -4,4 +4,5 @@ offset: 36887 # Same as _lsf_dataset.py eval_length: 5269 # Same as _lsf_dataset.py; test_length=10539 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.0_R_small.yaml b/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.0_R_small.yaml index 338656c..b3bb984 100644 --- a/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.0_R_small.yaml +++ b/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.0_R_small.yaml @@ -5,4 +5,4 @@ module: num_samples: 100 patch_size: ??? context_length: ??? -num_new_scales: 2 \ No newline at end of file +num_new_scales: 3 \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.1_R_small.yaml b/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.1_R_small.yaml index 845a22b..df35ec7 100644 --- a/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.1_R_small.yaml +++ b/cli/conf/lsf-setup/multi_scale/eval/model/moirai_1.1_R_small.yaml @@ -5,4 +5,4 @@ module: num_samples: 100 patch_size: ??? context_length: ??? -num_new_scales: 2 \ No newline at end of file +num_new_scales: 3 \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/electricity.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/electricity.yaml index 70ca032..73e7350 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/electricity.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/electricity.yaml @@ -4,3 +4,4 @@ train_length: 18412 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/etth1.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/etth1.yaml index f7235c4..bd54733 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/etth1.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/etth1.yaml @@ -3,4 +3,5 @@ dataset: ETTh1 train_length: 8640 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/etth2.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/etth2.yaml index 1dd47f7..6e3eede 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/etth2.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/etth2.yaml @@ -3,4 +3,5 @@ dataset: ETTh2 train_length: 8640 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/ettm1.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/ettm1.yaml index dbde79e..2f84768 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/ettm1.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/ettm1.yaml @@ -3,4 +3,5 @@ dataset: ETTm1 train_length: 34560 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/ettm2.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/ettm2.yaml index 5c402f1..dee2561 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/ettm2.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/ettm2.yaml @@ -4,3 +4,4 @@ train_length: 34560 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? diff --git a/cli/conf/lsf-setup/multi_scale/finetune/data/weather.yaml b/cli/conf/lsf-setup/multi_scale/finetune/data/weather.yaml index a6fa5fd..86b5bcc 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/data/weather.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/data/weather.yaml @@ -4,3 +4,4 @@ train_length: 36887 prediction_length: ??? context_length: ??? patch_size: ??? +mode: ??? diff --git a/cli/conf/lsf-setup/multi_scale/finetune/default.yaml b/cli/conf/lsf-setup/multi_scale/finetune/default.yaml index 7239871..d3135e2 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/default.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/default.yaml @@ -1,6 +1,6 @@ hydra: run: - dir: outputs/lsf-setup/multi_scale/finetune/${hydra:runtime.choices.model}/${exp_name}/${model.finetune_pattern}/${hydra:runtime.choices.data}/${run_name} + dir: outputs/lsf-setup/multi_scale/finetune/${hydra:runtime.choices.model}/${exp_name}/${model.finetune_pattern}/${hydra:runtime.choices.data}/${data.mode}/${run_name} defaults: - model: ??? - data: ??? @@ -40,7 +40,11 @@ trainer: mode: min strict: false verbose: true -# warmup_steps: 1 + # warmup_steps: 1 + - _target_: lightning.pytorch.callbacks.ModelCheckpoint + dirpath: ${hydra:runtime.output_dir}/checkpoints + save_last: true + save_weights_only: true max_epochs: 1000 enable_progress_bar: true accumulate_grad_batches: 1 diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/electricity.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/electricity.yaml index 74981e0..a20c574 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/electricity.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/electricity.yaml @@ -4,4 +4,5 @@ offset: 18412 # Same as _lsf_dataset.py eval_length: 2630 # Same as _lsf_dataset.py, test_length=5260 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth1.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth1.yaml index a409cde..2a379ab 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth1.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth1.yaml @@ -4,4 +4,5 @@ offset: 8640 eval_length: 2880 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth2.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth2.yaml index 31ca968..90e8296 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth2.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/etth2.yaml @@ -4,4 +4,5 @@ offset: 8640 # Same as _lsf_dataset.py eval_length: 2880 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm1.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm1.yaml index 3f0244c..3cdf94b 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm1.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm1.yaml @@ -4,4 +4,5 @@ offset: 34560 # Same as _lsf_dataset.py eval_length: 11520 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm2.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm2.yaml index 0939493..74ae64c 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm2.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/ettm2.yaml @@ -4,4 +4,5 @@ offset: 34560 # Same as _lsf_dataset.py eval_length: 11520 # Same as _lsf_dataset.py prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/cli/conf/lsf-setup/multi_scale/finetune/val_data/weather.yaml b/cli/conf/lsf-setup/multi_scale/finetune/val_data/weather.yaml index c2a23de..1d4e331 100644 --- a/cli/conf/lsf-setup/multi_scale/finetune/val_data/weather.yaml +++ b/cli/conf/lsf-setup/multi_scale/finetune/val_data/weather.yaml @@ -4,4 +4,5 @@ offset: 36887 # Same as _lsf_dataset.py eval_length: 5269 # Same as _lsf_dataset.py; test_length=10539 prediction_length: ??? context_length: ??? -patch_size: ??? \ No newline at end of file +patch_size: ??? +mode: ??? \ No newline at end of file diff --git a/project/lsf-setup/build_lsf_ft_datasets.sh b/project/lsf-setup/build_lsf_ft_datasets.sh index f35b537..d88b9c2 100644 --- a/project/lsf-setup/build_lsf_ft_datasets.sh +++ b/project/lsf-setup/build_lsf_ft_datasets.sh @@ -3,7 +3,7 @@ set -a source .env set +a -ds_type="wide" # "wide_multivariate" +ds_type="wide_multivariate" # "wide_multivariate" path_prefix=$LSF_PATH for data in ETTh1 ETTh2; do diff --git a/project/lsf-setup/lsf/finetune/small/electricity.sh b/project/lsf-setup/lsf/finetune/small/electricity.sh index 9ac2970..d57c1b6 100644 --- a/project/lsf-setup/lsf/finetune/small/electricity.sh +++ b/project/lsf-setup/lsf/finetune/small/electricity.sh @@ -8,6 +8,7 @@ exp_name=lsf data=electricity cl=5000 ps=64 +mode=S ft_pattern=full @@ -25,9 +26,11 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ val_data.prediction_length=$pl \ + val_data.mode=${mode} \ model.lr=5e-6 done \ No newline at end of file diff --git a/project/lsf-setup/lsf/finetune/small/etth1.sh b/project/lsf-setup/lsf/finetune/small/etth1.sh index 7798c69..626c843 100644 --- a/project/lsf-setup/lsf/finetune/small/etth1.sh +++ b/project/lsf-setup/lsf/finetune/small/etth1.sh @@ -8,6 +8,7 @@ exp_name=lsf data=etth1 cl=5000 ps=64 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/lsf/finetune/small/etth2.sh b/project/lsf-setup/lsf/finetune/small/etth2.sh index 0927993..780ba83 100644 --- a/project/lsf-setup/lsf/finetune/small/etth2.sh +++ b/project/lsf-setup/lsf/finetune/small/etth2.sh @@ -8,6 +8,7 @@ exp_name=lsf data=etth2 cl=3000 ps=64 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/lsf/finetune/small/ettm1.sh b/project/lsf-setup/lsf/finetune/small/ettm1.sh index 515daa7..061a5e1 100644 --- a/project/lsf-setup/lsf/finetune/small/ettm1.sh +++ b/project/lsf-setup/lsf/finetune/small/ettm1.sh @@ -8,6 +8,7 @@ exp_name=lsf data=ettm1 cl=4000 ps=128 +mode=S ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/lsf/finetune/small/ettm2.sh b/project/lsf-setup/lsf/finetune/small/ettm2.sh index 2a9cce4..2dc2415 100644 --- a/project/lsf-setup/lsf/finetune/small/ettm2.sh +++ b/project/lsf-setup/lsf/finetune/small/ettm2.sh @@ -8,6 +8,7 @@ exp_name=lsf data=ettm2 cl=3000 ps=64 +mode=S ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/lsf/finetune/small/weather.sh b/project/lsf-setup/lsf/finetune/small/weather.sh index 98939b0..9db8ddc 100644 --- a/project/lsf-setup/lsf/finetune/small/weather.sh +++ b/project/lsf-setup/lsf/finetune/small/weather.sh @@ -8,6 +8,7 @@ exp_name=lsf data=weather cl=2000 ps=128 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/electricity.sh b/project/lsf-setup/multi_scale/finetune/small/electricity.sh index 2b1af2b..ac8d4fc 100644 --- a/project/lsf-setup/multi_scale/finetune/small/electricity.sh +++ b/project/lsf-setup/multi_scale/finetune/small/electricity.sh @@ -8,6 +8,7 @@ exp_name=lsf data=electricity cl=5000 ps=64 +mode=S ft_pattern=full @@ -25,9 +26,11 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ val_data.prediction_length=$pl \ + val_data.mode=${mode} \ model.lr=5e-6 done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/etth1.sh b/project/lsf-setup/multi_scale/finetune/small/etth1.sh index 3e724fa..cedd949 100644 --- a/project/lsf-setup/multi_scale/finetune/small/etth1.sh +++ b/project/lsf-setup/multi_scale/finetune/small/etth1.sh @@ -8,6 +8,7 @@ exp_name=lsf data=etth1 cl=5000 ps=64 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/etth2.sh b/project/lsf-setup/multi_scale/finetune/small/etth2.sh index 0bfad04..c2baada 100644 --- a/project/lsf-setup/multi_scale/finetune/small/etth2.sh +++ b/project/lsf-setup/multi_scale/finetune/small/etth2.sh @@ -8,6 +8,7 @@ exp_name=lsf data=etth2 cl=3000 ps=64 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/ettm1.sh b/project/lsf-setup/multi_scale/finetune/small/ettm1.sh index 99a191a..30204fc 100644 --- a/project/lsf-setup/multi_scale/finetune/small/ettm1.sh +++ b/project/lsf-setup/multi_scale/finetune/small/ettm1.sh @@ -8,6 +8,7 @@ exp_name=lsf data=ettm1 cl=4000 ps=128 +mode=S ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/ettm2.sh b/project/lsf-setup/multi_scale/finetune/small/ettm2.sh index 57b2713..66ff9cb 100644 --- a/project/lsf-setup/multi_scale/finetune/small/ettm2.sh +++ b/project/lsf-setup/multi_scale/finetune/small/ettm2.sh @@ -1,6 +1,6 @@ #!/bin/bash -export HYDRA_FULL_ERROR=1; export CUDA_VISIBLE_DEVICES=0; +export HYDRA_FULL_ERROR=1; export CUDA_VISIBLE_DEVICES=3; model=moirai_1.0_R_small cp=conf/lsf-setup/multi_scale/finetune @@ -8,6 +8,7 @@ exp_name=lsf data=ettm2 cl=3000 ps=64 +mode=S ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/project/lsf-setup/multi_scale/finetune/small/weather.sh b/project/lsf-setup/multi_scale/finetune/small/weather.sh index fc8ae55..95e2cc7 100644 --- a/project/lsf-setup/multi_scale/finetune/small/weather.sh +++ b/project/lsf-setup/multi_scale/finetune/small/weather.sh @@ -8,6 +8,7 @@ exp_name=lsf data=weather cl=2000 ps=128 +mode=M ft_pattern=full @@ -25,8 +26,10 @@ for pl in 96 192 336 720; do data.patch_size=${ps} \ data.context_length=$cl \ data.prediction_length=$pl \ + data.mode=${mode} \ val_data=${data} \ val_data.patch_size=${ps} \ val_data.context_length=$cl \ - val_data.prediction_length=$pl + val_data.prediction_length=$pl \ + val_data.mode=${mode} done \ No newline at end of file diff --git a/src/uni2ts/data/builder/simple.py b/src/uni2ts/data/builder/simple.py index 832f880..5a89a32 100644 --- a/src/uni2ts/data/builder/simple.py +++ b/src/uni2ts/data/builder/simple.py @@ -274,6 +274,7 @@ class SimpleFinetuneDatasetBuilder(DatasetBuilder): prediction_length: Optional[int] context_length: Optional[int] patch_size: Optional[int] + mode: Optional[str] = 'S' storage_path: Path = env.CUSTOM_DATA_PATH mean = None std = None @@ -331,16 +332,22 @@ def build_dataset( example_gen_func, features=features ) hf_dataset.info.dataset_name = self.dataset - hf_dataset.save_to_disk(self.storage_path / 'lsf' / self.dataset) + hf_dataset.save_to_disk(self.storage_path / 'lsf' / f'{dataset_type}' / self.dataset) def load_dataset( self, transform_map: dict[str, Callable[..., Transformation]] ) -> Dataset: + + if self.mode == 'S': + dataset_type = 'wide' + elif self.mode == 'M': + dataset_type = 'wide_multivariate' + return FinetuneDataset( self.windows, HuggingFaceDatasetIndexer( datasets.load_from_disk( - str(self.storage_path / 'lsf' / self.dataset), + str(self.storage_path / 'lsf' / f'{dataset_type}' / self.dataset), ) ), transform=transform_map[self.dataset]( @@ -367,6 +374,7 @@ class SimpleEvalDatasetBuilder(DatasetBuilder): prediction_length: Optional[int] context_length: Optional[int] patch_size: Optional[int] + mode: Optional[str] = 'S' storage_path: Path = env.CUSTOM_DATA_PATH def __post_init__(self): @@ -402,16 +410,21 @@ def build_dataset( example_gen_func, features=features ) hf_dataset.info.dataset_name = self.dataset - hf_dataset.save_to_disk(self.storage_path / 'lsf' / self.dataset) + hf_dataset.save_to_disk(self.storage_path / 'lsf' / f'{dataset_type}' / self.dataset) def load_dataset( self, transform_map: dict[str, Callable[..., Transformation]] ) -> Dataset: + if self.mode == 'S': + dataset_type = 'wide' + elif self.mode == 'M': + dataset_type = 'wide_multivariate' + return EvalDataset( self.windows, HuggingFaceDatasetIndexer( datasets.load_from_disk( - str(self.storage_path / 'lsf' / self.dataset), + str(self.storage_path / 'lsf' / f'{dataset_type}' / self.dataset), ) ), transform=transform_map[self.dataset]( @@ -430,6 +443,7 @@ def generate_finetune_builder( prediction_length: int, context_length: int, patch_size: int, + mode: str, storage_path: Path = env.CUSTOM_DATA_PATH, ) -> SimpleFinetuneDatasetBuilder: """ @@ -442,6 +456,7 @@ def generate_finetune_builder( prediction_length=prediction_length, context_length=context_length, patch_size=patch_size, + mode=mode, storage_path=storage_path, ) @@ -453,6 +468,7 @@ def generate_eval_builder( prediction_length: int, context_length: int, patch_size: int, + mode: str, storage_path: Path = env.CUSTOM_DATA_PATH, ) -> SimpleEvalDatasetBuilder: """ @@ -505,6 +521,7 @@ def generate_eval_builder( prediction_length=prediction_length, context_length=context_length, patch_size=patch_size, + mode=mode, storage_path=storage_path, ) diff --git a/src/uni2ts/model/multi_scale_moirai/finetune.py b/src/uni2ts/model/multi_scale_moirai/finetune.py index 72662d3..24b23eb 100644 --- a/src/uni2ts/model/multi_scale_moirai/finetune.py +++ b/src/uni2ts/model/multi_scale_moirai/finetune.py @@ -132,15 +132,15 @@ def __init__( self.token_idx_per_scale = self._get_token_idx_per_scale() def post_init(self): - # for layer in self.module.encoder.layers: - # # Check if the layer has an attribute named `self_attn` and if it is an instance of GroupedQueryAttention - # if hasattr(layer, 'self_attn') and isinstance(layer.self_attn, GroupedQueryAttention): - # # Call post_init() method of the GroupedQueryAttention object - # layer.self_attn.init_multi_scale_modules(self.context_length, self.patch_size, self.num_new_scales, self.ds_factor) + for layer in self.module.encoder.layers: + # Check if the layer has an attribute named `self_attn` and if it is an instance of GroupedQueryAttention + if hasattr(layer, 'self_attn') and isinstance(layer.self_attn, GroupedQueryAttention): + # Call post_init() method of the GroupedQueryAttention object + layer.self_attn.init_multi_scale_modules(self.context_length, self.patch_size, self.num_new_scales, self.ds_factor) - for module in self.module.encoder.modules(): - if isinstance(module, MultiScaleRotaryProjection): - module.post_init(self.token_idx_per_scale) + # for module in self.module.encoder.modules(): + # if isinstance(module, MultiScaleRotaryProjection): + # module.post_init(self.token_idx_per_scale) # ToDo: Call post_init() method to replace BinaryAttentionBias to CrossVariateAttentionBias # from_pretrained的Pipeline是什么?先init,再load? 然后load不了的参数自动忽略?如果是这样就不用加post_init @@ -324,6 +324,12 @@ def configure_optimizers(self) -> dict: if "film" in pn: p.requires_grad = True + if "adapt_weight" in pn: + p.requires_grad = True + + if "adapt_bias" in pn: + p.requires_grad = True + if "var_attn_bias.emb" in pn: p.requires_grad = True @@ -417,6 +423,8 @@ def configure_optimizers(self) -> dict: decay.add(fpn) elif pn.endswith("weight") and isinstance(m, blacklist_params): no_decay.add(fpn) + elif "adapt_weight" in pn or "adapt_bias" in pn: + decay.add(fpn) # validate that we considered every parameter param_dict = {pn: p for pn, p in self.named_parameters() if p.requires_grad} diff --git a/src/uni2ts/module/multi_scale/attention.py b/src/uni2ts/module/multi_scale/attention.py index 7a4cd33..abb4009 100644 --- a/src/uni2ts/module/multi_scale/attention.py +++ b/src/uni2ts/module/multi_scale/attention.py @@ -100,6 +100,41 @@ def __init__( self.dim = dim self.num_new_scales = None + + def init_multi_scale_modules(self, context_length, patch_size, num_new_scales, ds_factor): + self.num_new_scales = num_new_scales + + base_len = math.ceil(context_length / patch_size) # num context patches in base scale + scale_len = math.ceil(base_len / ds_factor) + + # Initialize parameter lists + self.query_adapt_weight = nn.ParameterList() + self.key_adapt_weight = nn.ParameterList() + self.value_adapt_weight = nn.ParameterList() + self.query_adapt_bias = nn.ParameterList() + self.key_adapt_bias = nn.ParameterList() + self.value_adapt_bias = nn.ParameterList() + + for _ in range(num_new_scales): + # Append the new parameters for the current scale + self.query_adapt_weight.append( + nn.Parameter(torch.ones((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + self.key_adapt_weight.append( + nn.Parameter(torch.ones((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + self.value_adapt_weight.append( + nn.Parameter(torch.ones((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + + self.query_adapt_bias.append( + nn.Parameter(torch.zeros((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + self.key_adapt_bias.append( + nn.Parameter(torch.zeros((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + self.value_adapt_bias.append( + nn.Parameter(torch.zeros((scale_len, self.dim), dtype=torch.float), requires_grad=True)) + + # Update scale length for the next iteration + scale_len = math.ceil(scale_len / ds_factor) + + # def init_multi_scale_modules(self, context_length, patch_size, num_new_scales, ds_factor): # # self.num_new_scales = num_new_scales @@ -115,28 +150,24 @@ def __init__( # nn.Linear(in_features=nh, out_features=self.dim) for _ in range(num_new_scales) # ]) - def init_multi_scale_modules(self, context_length, patch_size, num_new_scales, ds_factor): - - self.num_new_scales = num_new_scales - - base_len = math.ceil(context_length / patch_size) # num context patches in base scale - - scale_len = math.ceil(base_len / ds_factor) - self.query_film_generator = nn.ModuleList([ - nn.Linear(in_features=self.dim, out_features=2 * scale_len) - ]) - self.key_film_generator = nn.ModuleList([ - nn.Linear(in_features=self.dim, out_features=2 * scale_len) - ]) - - for _ in range(1, num_new_scales): - scale_len = math.ceil(scale_len / ds_factor) - self.query_film_generator.append( - nn.Linear(in_features=self.dim, out_features=2 * scale_len) - ) - self.key_film_generator.append( - nn.Linear(in_features=self.dim, out_features=2 * scale_len) - ) + # def init_multi_scale_modules(self, context_length, patch_size, num_new_scales, ds_factor): + # + # self.num_new_scales = num_new_scales + # + # base_len = math.ceil(context_length / patch_size) # num context patches in base scale + # scale_len = math.ceil(base_len / ds_factor) + # + # self.query_film_generator = nn.ModuleList() + # self.key_film_generator = nn.ModuleList() + # + # for _ in range(num_new_scales): + # self.query_film_generator.append( + # nn.Linear(in_features=self.dim, out_features=2 * scale_len) + # ) + # self.key_film_generator.append( + # nn.Linear(in_features=self.dim, out_features=2 * scale_len) + # ) + # scale_len = math.ceil(scale_len / ds_factor) def _get_var_id( self, @@ -301,28 +332,56 @@ def forward( query_time_id: Optional[Int[torch.Tensor, "*batch q_len"]] = None, kv_time_id: Optional[Int[torch.Tensor, "*batch kv_len"]] = None, ) -> Float[torch.Tensor, "*batch q_len dim"]: - query = self.q_proj(query) - key = self.k_proj(key) - value = self.v_proj(value) + # query = self.q_proj(query) + # key = self.k_proj(key) + # value = self.v_proj(value) + + init_query = self.q_proj(query) + init_key = self.k_proj(key) + init_value = self.v_proj(value) + + query = init_query.clone() + key = init_key.clone() + value = init_value.clone() + + # ToDo: Plan B: Directly apply different Film on query / key to different scales. W.o revising RoPE + if self.num_new_scales is not None: + index_by_variate = self.get_token_index_by_variate(query_var_id) + + for scale in range(self.num_new_scales): + assert torch.equal(query_var_id, kv_var_id), "query_var_id is different from kv_var_id" + index = index_by_variate[scale + 1] + query_scale = init_query[..., index, :] # (bs, num_patch_new_scale, dim) + query[..., index, :] = self.query_adapt_weight[scale] * query_scale + self.query_adapt_bias[scale] + + key_scale = init_key[..., index, :] # (bs, num_patch_new_scale, dim) + key[..., index, :] = self.key_adapt_weight[scale] * key_scale + self.key_adapt_bias[scale] + + value_scale = init_value[..., index, :] # (bs, num_patch_new_scale, dim) + value[..., index, :] = self.value_adapt_weight[scale] * value_scale + self.value_adapt_bias[scale] - # # ToDo: Plan B: Directly apply different Film on query / key to different scales. W.o revising RoPE + + + # # Apply a different transformation for each dimension. All tokens share the same transformation. # if self.num_new_scales is not None: # index_by_variate = self.get_token_index_by_variate(query_var_id) - # + # # for scale in range(self.num_new_scales): # assert torch.equal(query_var_id, kv_var_id), "query_var_id is different from kv_var_id" # index = index_by_variate[scale + 1] - # + # # query_scale = query[..., index, :] # (bs, num_patch_new_scale, dim) # query_scale_reprs = self.film_controller(torch.mean(query_scale, dim=1)) - # query_weight = self.query_film_generator[scale](query_scale_reprs) - # query[..., index, :] = query_weight.unsqueeze(-2) * query_scale - # + # query_adapt_weight = self.query_film_generator[scale](query_scale_reprs) # (bs, dim) + # query[..., index, :] = query_adapt_weight.unsqueeze(-2) * query_scale + # # key_scale = key[..., index, :] # key_scale_reprs = self.film_controller(torch.mean(key_scale, dim=1)) - # key_weight = self.key_film_generator[scale](key_scale_reprs) - # key[..., index, :] = key_weight.unsqueeze(-2) * key_scale + # key_adapt_weight = self.key_film_generator[scale](key_scale_reprs) + # key[..., index, :] = key_adapt_weight.unsqueeze(-2) * key_scale + + # # Apply a different transformation for each token. All dimensions of a token share the same transformation. # if self.num_new_scales is not None: # index_by_variate = self.get_token_index_by_variate(query_var_id) # @@ -331,15 +390,17 @@ def forward( # index = index_by_variate[scale+1] # query_scale = query[..., index, :] # (bs, num_patch_new_scale, dim) # query_film_out = self.query_film_generator[scale](torch.mean(query_scale, dim=1)) # ToDo: 换成faltten试试? - # query_weight, query_bias = query_film_out[:, :int(query_film_out.size(-1) / 2)], query_film_out[:, int(query_film_out.size(-1) / 2):] - # query[..., index, :] = query_weight.unsqueeze(-1) * query_scale + query_bias.unsqueeze(-1) + # query_adapt_weight, query_adapt_bias = query_film_out[:, :int(query_film_out.size(-1) / 2)], query_film_out[:, int(query_film_out.size(-1) / 2):] + # query[..., index, :] = query_adapt_weight.unsqueeze(-1) * query_scale + query_adapt_bias.unsqueeze(-1) # # key_scale = key[..., index, :] # key_film_out = self.key_film_generator[scale](torch.mean(key_scale, dim=1)) - # key_weight, key_bias = key_film_out[:, :int(key_film_out.size(-1) / 2)], key_film_out[:, + # key_adapt_weight, key_adapt_bias = key_film_out[:, :int(key_film_out.size(-1) / 2)], key_film_out[:, # int(key_film_out.size( # -1) / 2):] - # key[..., index, :] = key_weight.unsqueeze(-1) * key_scale + key_bias.unsqueeze(-1) + # key[..., index, :] = key_adapt_weight.unsqueeze(-1) * key_scale + key_adapt_bias.unsqueeze(-1) + + query = self.q_norm( rearrange(