From 92464c7f248a3419acb56e8a44333edb4fc8d120 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Sun, 18 Dec 2022 20:56:33 +0800 Subject: [PATCH 01/10] wandb add_corpus and resume_training modify --- textbox/config/configurator.py | 6 +++++- textbox/quick_start/experiment.py | 3 ++- textbox/trainer/trainer.py | 11 ++++------- textbox/utils/argument_list.py | 3 ++- textbox/utils/dashboard.py | 8 -------- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py index 0d1fa87e..21dd029b 100755 --- a/textbox/config/configurator.py +++ b/textbox/config/configurator.py @@ -262,6 +262,7 @@ def _set_default_parameters(self): self.setdefault('valid_strategy', 'epoch') self.setdefault('valid_steps', 1) self.setdefault('disable_tqdm', False) + self.setdefault('resume_training',True) self._simplify_parameter('optimizer') self._simplify_parameter('scheduler') self._simplify_parameter('src_lang') @@ -304,7 +305,10 @@ def check_load_type(self): if not self.final_config_dict.get('model_path', None): self.final_config_dict['load_type'] = 'from_scratch' elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')): - self.final_config_dict['load_type'] = 'resume' + if self.final_config_dict.get('resume_all'): + self.final_config_dict['load_type'] = 'resume ' + else: + self.final_config_dict['load_type'] = 'resume' else: self.final_config_dict['load_type'] = 'from_pretrained' diff --git a/textbox/quick_start/experiment.py b/textbox/quick_start/experiment.py index 9c3314f0..00afd5e8 100644 --- a/textbox/quick_start/experiment.py +++ b/textbox/quick_start/experiment.py @@ -94,7 +94,8 @@ def _on_experiment_start(self, extended_config: Optional[dict]): self.valid_result: Optional[ResultType] = None self.test_result: Optional[ResultType] = None if config['load_type'] == 'resume': - self.trainer.resume_checkpoint(config['model_path']) + if config['resume_training']: + self.trainer.resume_checkpoint(config['model_path']) self.model.from_pretrained(config['model_path']) def _do_train_and_valid(self): diff --git a/textbox/trainer/trainer.py b/textbox/trainer/trainer.py index 241e1a6c..94e08a10 100755 --- a/textbox/trainer/trainer.py +++ b/textbox/trainer/trainer.py @@ -364,18 +364,15 @@ def save_checkpoint(self): def save_generated_text(self, generated_corpus: List[str], is_valid: bool = False): r"""Store the generated text by our model into `self.saved_text_filename`.""" saved_text_filename = self.saved_text_filename - if not is_valid: - self._summary_tracker.add_corpus('test', generated_corpus) - else: - path_to_save = self.saved_model_filename + '_epoch-' + str(self.timestamp.valid_epoch) - saved_text_filename = os.path.join(path_to_save, 'generation.txt') - os.makedirs(path_to_save, exist_ok=True) + path_to_save = self.saved_model_filename + '_epoch-' + str(self.timestamp.valid_epoch) + saved_text_filename = os.path.join(path_to_save, 'generation.txt') + os.makedirs(path_to_save, exist_ok=True) with open(saved_text_filename, 'w') as fout: for text in generated_corpus: fout.write(text + '\n') def resume_checkpoint(self, resume_dir: str): - r"""Load the model parameters information and training information. + r"""Load training information. Args: resume_dir: the checkpoint file (specific by `model_path`). diff --git a/textbox/utils/argument_list.py b/textbox/utils/argument_list.py index 95c5218d..bd18fdb6 100644 --- a/textbox/utils/argument_list.py +++ b/textbox/utils/argument_list.py @@ -43,7 +43,8 @@ 'weight_decay', # common parameters 'accumulation_steps', # accelerator 'disable_tqdm', # tqdm - 'pretrain_task' # pretraining + 'pretrain_task', # pretraining + 'resume_training' ] evaluation_parameters = [ diff --git a/textbox/utils/dashboard.py b/textbox/utils/dashboard.py index 88e37ee6..3c43d93e 100644 --- a/textbox/utils/dashboard.py +++ b/textbox/utils/dashboard.py @@ -435,14 +435,6 @@ def add_scalar(self, tag: str, scalar_value: Union[float, int]): if self._is_local_main_process and not self.tracker_finished and self.axes is not None: wandb.log(info, step=self.axes.train_step, commit=False) - def add_corpus(self, tag: str, corpus: Iterable[str]): - r"""Add a corpus to summary.""" - if tag.startswith('valid'): - self._current_epoch._update_metrics({'generated_corpus': '\n'.join(corpus)}) - if self._is_local_main_process and not self.tracker_finished: - _corpus = wandb.Table(columns=[tag], data=pd.DataFrame(corpus)) - wandb.log({tag: _corpus}, step=self.axes.train_step) - root = None From dbfabb2cec834f592a46e916de77feedd44299ef Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Mon, 26 Dec 2022 17:46:34 +0800 Subject: [PATCH 02/10] wandb modify --- install.sh | 2 +- textbox/config/configurator.py | 1 + textbox/properties/overall.yaml | 1 + textbox/quick_start/experiment.py | 2 ++ textbox/utils/argument_list.py | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/install.sh b/install.sh index 069500ea..82ff2fbf 100644 --- a/install.sh +++ b/install.sh @@ -35,7 +35,7 @@ esac echo "Installation may take a few minutes." echo -e "\033[0;32mInstalling torch ...\033[0m" -conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch +conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch echo -e "\033[0;32mInstalling requirements ...\033[0m" pip install -r requirements.txt diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py index 21dd029b..e72be40e 100755 --- a/textbox/config/configurator.py +++ b/textbox/config/configurator.py @@ -263,6 +263,7 @@ def _set_default_parameters(self): self.setdefault('valid_steps', 1) self.setdefault('disable_tqdm', False) self.setdefault('resume_training',True) + self.setdefault('wandb', 'online') self._simplify_parameter('optimizer') self._simplify_parameter('scheduler') self._simplify_parameter('src_lang') diff --git a/textbox/properties/overall.yaml b/textbox/properties/overall.yaml index 684dd22b..0fa0d6be 100644 --- a/textbox/properties/overall.yaml +++ b/textbox/properties/overall.yaml @@ -5,6 +5,7 @@ seed: 2020 state: INFO reproducibility: True data_path: 'dataset/' +wandb: 'online' # training settings epochs: 50 diff --git a/textbox/quick_start/experiment.py b/textbox/quick_start/experiment.py index 00afd5e8..6e58bbe8 100644 --- a/textbox/quick_start/experiment.py +++ b/textbox/quick_start/experiment.py @@ -37,6 +37,8 @@ def __init__( config_dict: Optional[Dict[str, Any]] = None, ): self.config = Config(model, dataset, config_file_list, config_dict) + wandb_setting = 'wandb ' + self.config['wandb'] + os.system(wandb_setting) self.__extended_config = None self.accelerator = Accelerator(gradient_accumulation_steps=self.config['accumulation_steps']) diff --git a/textbox/utils/argument_list.py b/textbox/utils/argument_list.py index bd18fdb6..11f1b5ac 100644 --- a/textbox/utils/argument_list.py +++ b/textbox/utils/argument_list.py @@ -21,6 +21,7 @@ '_hyper_tuning', # hyper tuning 'multi_seed', # multiple random seed 'romanian_postprocessing', + 'wandb' ] training_parameters = [ From d07991efb1ee2ea0b3952ba3285b38f7cbe30949 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Mon, 26 Dec 2022 20:10:51 +0800 Subject: [PATCH 03/10] modify install.sh --- install.sh | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/install.sh b/install.sh index 069500ea..68d7b24c 100644 --- a/install.sh +++ b/install.sh @@ -35,7 +35,7 @@ esac echo "Installation may take a few minutes." echo -e "\033[0;32mInstalling torch ...\033[0m" -conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch +conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch echo -e "\033[0;32mInstalling requirements ...\033[0m" pip install -r requirements.txt @@ -75,16 +75,6 @@ chmod +rx $F2RExpDIR/WordNet-2.0.exc.db pip uninstall py-rouge pip install rouge > /dev/null -echo -e "\033[0;32mInstalling requirements (libxml) ...\033[0m" -if [[ "$OSTYPE" == "darwin"* ]]; then - brewinstall libxml2 cpanminus - cpanm --force XML::Parser -else - if [ -x "$(command -v apt-get)" ]; then sudo apt-get install libxml-parser-perl - elif [ -x "$(command -v yum)" ]; then sudo yum install -y "perl(XML::LibXML)" - else echo -e '\033[0;31mFailed to install libxml. See https://github.com/pltrdy/files2rouge/issues/9 for more information.\033[0m' && exit; - fi -fi echo -e "\033[0;32mInstalling requirements (transformers) ...\033[0m" git clone https://github.com/RUCAIBox/transformers.git From 02bd80c1d36e2fa4c607fa17d0b616292dfb4861 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 19:46:09 +0800 Subject: [PATCH 04/10] modify basic_training.md --- asset/basic_training.md | 123 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/asset/basic_training.md b/asset/basic_training.md index e69de29b..697111cf 100644 --- a/asset/basic_training.md +++ b/asset/basic_training.md @@ -0,0 +1,123 @@ +# Basic Training + + +## config +You may want to load your own configurations in different ways: +* cmd +* config files +* yaml + +These ways are equivalent. However, we recommend using **cmd** first, followed by **config files**. + + + +### cmd +You may want to change configurations in the command line like ``--xx=yy``. ``xx`` is the name of the parameters and ``yy`` is the corresponding value. for example: + +```bash +python run_textbox.py --model=BART --model_path=facebook/bart-base --epochs=1 +``` + +It's suitable for **a few temporary** modifications with cmd like: +* ``model`` +* ``model_path`` +* ``dataset`` +* ``epochs`` +* ... + +### config files + +You can also modify configurations in the local files: +```bash +python run_textbox.py ... --config_files +``` + +Every config file is an additional yaml files like: + +```yaml +efficient_methods: ['prompt-tuning'] +``` +It's suitable for **a large number of** modifications or **long-term** modification with cmd like: +* ``efficient_methods`` +* ``efficient_kwargs`` +* ... + +### yaml + +The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanently** modification the dataset. These files are in the path ``textbox\properties``: +* ``overall.yaml`` +* ``dataset\*.yaml`` +* ``model\*yaml`` + + +## trainer + +You can choose optimizer and scheduler through `optimizer=` and `scheduler=`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and [scheduler]() for a complete tutorial. + +Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`. + +`max_save=` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files. + +Evaluation metrics can be specified with `metrics` ([full list](evaluation.md)), and produce a dictionaries of results: + +```bash +python run_textbox.py ... --metrics=\[\'rouge\'\] +# results: { 'rouge-1': xxx, 'rouge-2': xxx, 'rouge-l': xxx, 'rouge-w': xxx, ... } +``` + +**Early stopping** can be configured with `metrics_for_best_model=`, which is used to calculate score, and `stopping_steps=`, which specifies the amount of validation steps: + +```bash +python run_textbox.py ... --stopping_steps=8 --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\] +``` + +or yaml equivalent: + +```yaml +stopping_steps: 8 +metrics_for_best_model: ['rouge-1', 'rouge-w'] +``` + +Other commonly used parameters includes `epochs=` and `max_steps=` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=`, `train_batch_size=`, `weight_decay=`, and `grad_clip=`. + + + +### Partial Experiment + +You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=` to load just a few examples. + +The following script loads the trained model from path `example` and conducts generation and evaluation without training and evaluation. +```bash +python run_textbox.py ... --do_train=False --do_valid=False \\ +--model_path=example --quick_test=16 +``` + + + + + + + + +## wandb + +If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text): + +```python +%env WANDB_API_KEY= +``` +Here you can set wandb with `wandb`. + +If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled` and **none of the metrics** will be recorded.You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like: + +```yaml +wandb: online +``` + + +The local files can be uploaded by executing `wandb sync` in the command line. + +After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai). + + + From 51b66af17a0aca2968980dd88b0a058d81b96bd7 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 19:48:07 +0800 Subject: [PATCH 05/10] modify basic_training.md --- asset/basic_training.md | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/asset/basic_training.md b/asset/basic_training.md index 697111cf..64eccff9 100644 --- a/asset/basic_training.md +++ b/asset/basic_training.md @@ -1,6 +1,4 @@ # Basic Training - - ## config You may want to load your own configurations in different ways: * cmd @@ -80,8 +78,6 @@ metrics_for_best_model: ['rouge-1', 'rouge-w'] Other commonly used parameters includes `epochs=` and `max_steps=` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=`, `train_batch_size=`, `weight_decay=`, and `grad_clip=`. - - ### Partial Experiment You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=` to load just a few examples. @@ -92,13 +88,6 @@ python run_textbox.py ... --do_train=False --do_valid=False \\ --model_path=example --quick_test=16 ``` - - - - - - - ## wandb If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text): @@ -114,10 +103,6 @@ If you are debugging your model, you may want to **disable W&B** with `--wandb=d wandb: online ``` - The local files can be uploaded by executing `wandb sync` in the command line. -After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai). - - - +After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai). \ No newline at end of file From 74ef83808a4635865cd9750e2c2979eb7866b26c Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 19:51:26 +0800 Subject: [PATCH 06/10] configurator fixed --- textbox/config/configurator.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py index e72be40e..839939a2 100755 --- a/textbox/config/configurator.py +++ b/textbox/config/configurator.py @@ -306,9 +306,6 @@ def check_load_type(self): if not self.final_config_dict.get('model_path', None): self.final_config_dict['load_type'] = 'from_scratch' elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')): - if self.final_config_dict.get('resume_all'): - self.final_config_dict['load_type'] = 'resume ' - else: self.final_config_dict['load_type'] = 'resume' else: self.final_config_dict['load_type'] = 'from_pretrained' From 948fcde5d0a692afb252536410b80021d78278d9 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 20:06:31 +0800 Subject: [PATCH 07/10] basic_training modification and upload RNN.md --- asset/basic_training.md | 6 +----- instructions/RNN.md | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/asset/basic_training.md b/asset/basic_training.md index 64eccff9..aa919dd6 100644 --- a/asset/basic_training.md +++ b/asset/basic_training.md @@ -1,14 +1,10 @@ # Basic Training ## config -You may want to load your own configurations in different ways: +You may want to load your own configurations in equivalent ways: * cmd * config files * yaml -These ways are equivalent. However, we recommend using **cmd** first, followed by **config files**. - - - ### cmd You may want to change configurations in the command line like ``--xx=yy``. ``xx`` is the name of the parameters and ``yy`` is the corresponding value. for example: diff --git a/instructions/RNN.md b/instructions/RNN.md index e69de29b..eac13915 100644 --- a/instructions/RNN.md +++ b/instructions/RNN.md @@ -0,0 +1,16 @@ +## RNN + +You can train a RNN encoder-decoder with attention from scratch with this model. Three models are available: +* RNN +* GRU +* LSTM + +You can choose them through ``model=RNN``,``model=GRU``,``model=LSTM``. Meanwhile, you can check or modify the default parameters of the model in ``textbox/property/model/rnn.yaml(gru.yaml)(lstm.yaml)`` + +Example usage: + +```bash +python run_textbox.py \ + --model=RNN \ + --dataset=samsum +``` \ No newline at end of file From b60633f1e6971343c18fd8abc32501e612baa458 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 21:35:22 +0800 Subject: [PATCH 08/10] basic_training modify --- asset/basic_training.md | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/asset/basic_training.md b/asset/basic_training.md index aa919dd6..9f9566c6 100644 --- a/asset/basic_training.md +++ b/asset/basic_training.md @@ -21,7 +21,7 @@ It's suitable for **a few temporary** modifications with cmd like: ### config files -You can also modify configurations in the local files: +You can also modify configurations through the local files: ```bash python run_textbox.py ... --config_files ``` @@ -46,31 +46,22 @@ The original configurations are in the yaml files. You can check the values ther ## trainer -You can choose optimizer and scheduler through `optimizer=` and `scheduler=`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and [scheduler]() for a complete tutorial. +You can choose optimizer and scheduler through `optimizer=` and `scheduler=`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial. Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`. `max_save=` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files. -Evaluation metrics can be specified with `metrics` ([full list](evaluation.md)), and produce a dictionaries of results: +According to ``metrics_for_best_model``, thr score of current checkpoint will be calculated, and evaluatin metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=` and score of every checkpoint. -```bash -python run_textbox.py ... --metrics=\[\'rouge\'\] -# results: { 'rouge-1': xxx, 'rouge-2': xxx, 'rouge-l': xxx, 'rouge-w': xxx, ... } -``` - -**Early stopping** can be configured with `metrics_for_best_model=`, which is used to calculate score, and `stopping_steps=`, which specifies the amount of validation steps: ```bash -python run_textbox.py ... --stopping_steps=8 --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\] +python run_textbox.py ... --stopping_steps=8 \\ + --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\] \\ + --metrics=\[\'rouge\'\] ``` -or yaml equivalent: - -```yaml -stopping_steps: 8 -metrics_for_best_model: ['rouge-1', 'rouge-w'] -``` +You can resume from a **previous checkpoint** through ``model_path=``.When you want to restrore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded. Other commonly used parameters includes `epochs=` and `max_steps=` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=`, `train_batch_size=`, `weight_decay=`, and `grad_clip=`. From 6c1e961d52913072997266e89ca4a5f4cb1769ae Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 22:00:47 +0800 Subject: [PATCH 09/10] configurator delete space --- textbox/config/configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py index 839939a2..800c67dc 100755 --- a/textbox/config/configurator.py +++ b/textbox/config/configurator.py @@ -306,7 +306,7 @@ def check_load_type(self): if not self.final_config_dict.get('model_path', None): self.final_config_dict['load_type'] = 'from_scratch' elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')): - self.final_config_dict['load_type'] = 'resume' + self.final_config_dict['load_type'] = 'resume' else: self.final_config_dict['load_type'] = 'from_pretrained' From 8cdeec0df1bf73dd58faa293a332cef0f6a26461 Mon Sep 17 00:00:00 2001 From: 1190303125 <2424390036@qq.com> Date: Tue, 27 Dec 2022 22:33:18 +0800 Subject: [PATCH 10/10] add some examples and check the spelling --- asset/basic_training.md | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/asset/basic_training.md b/asset/basic_training.md index 9f9566c6..b9f25353 100644 --- a/asset/basic_training.md +++ b/asset/basic_training.md @@ -1,6 +1,6 @@ # Basic Training ## config -You may want to load your own configurations in equivalent ways: +You may want to load your configurations in equivalent ways: * cmd * config files * yaml @@ -26,19 +26,19 @@ You can also modify configurations through the local files: python run_textbox.py ... --config_files ``` -Every config file is an additional yaml files like: +Every config file is an additional yaml file like: ```yaml efficient_methods: ['prompt-tuning'] ``` -It's suitable for **a large number of** modifications or **long-term** modification with cmd like: +It's suitable for **a large number of** modifications or **long-term** modifications with cmd like: * ``efficient_methods`` * ``efficient_kwargs`` * ... ### yaml -The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanently** modification the dataset. These files are in the path ``textbox\properties``: +The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanent** modification of the dataset. These files are in the path ``textbox\properties``: * ``overall.yaml`` * ``dataset\*.yaml`` * ``model\*yaml`` @@ -46,13 +46,13 @@ The original configurations are in the yaml files. You can check the values ther ## trainer -You can choose optimizer and scheduler through `optimizer=` and `scheduler=`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial. +You can choose an optimizer and scheduler through `optimizer=` and `scheduler=`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial. -Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`. +Validation frequency is introduced to validate the model **at each specific batch-steps or epoch**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=` to adjust the pace. Specifically, the traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`. -`max_save=` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files. +`max_save=` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with the best score, and `n`: save both the best and the last $n−1$ files. -According to ``metrics_for_best_model``, thr score of current checkpoint will be calculated, and evaluatin metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=` and score of every checkpoint. +According to ``metrics_for_best_model``, the score of the current checkpoint will be calculated, and evaluation metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=` and score of every checkpoint. ```bash @@ -61,30 +61,35 @@ python run_textbox.py ... --stopping_steps=8 \\ --metrics=\[\'rouge\'\] ``` -You can resume from a **previous checkpoint** through ``model_path=``.When you want to restrore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded. +You can resume from a **previous checkpoint** through ``model_path=``.When you want to restore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded. The script below will resume training from checkpoint in the path ``saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best`` -Other commonly used parameters includes `epochs=` and `max_steps=` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=`, `train_batch_size=`, `weight_decay=`, and `grad_clip=`. +```bash +python run_textbox --model_path=saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best \\ +--resume_training=True +``` + +Other commonly used parameters include `epochs=` and `max_steps=` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=`, `train_batch_size=`, `weight_decay=`, and `grad_clip=`. ### Partial Experiment -You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=` to load just a few examples. +You can run the partial experiment with `do_train`, `do_valid`and `do_test`. You can test your pipeline and debug with `quick_test=` to load just a few examples. -The following script loads the trained model from path `example` and conducts generation and evaluation without training and evaluation. +The following script loads the trained model from a local path and conducts generation and evaluation without training and evaluation. ```bash -python run_textbox.py ... --do_train=False --do_valid=False \\ ---model_path=example --quick_test=16 +python run_textbox.py --model_path=saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best \\ +--do_train=False --do_valid=False ``` ## wandb -If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text): +If you are running your code in jupyter environments, you may want to log in by simply setting an environment variable (your key may be stored in plain text): ```python %env WANDB_API_KEY= ``` Here you can set wandb with `wandb`. -If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled` and **none of the metrics** will be recorded.You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like: +If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled`, and **none of the metrics** will be recorded. You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like: ```yaml wandb: online @@ -92,4 +97,4 @@ wandb: online The local files can be uploaded by executing `wandb sync` in the command line. -After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai). \ No newline at end of file +After configuration, you can throttle wandb prompts by defining the environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai). \ No newline at end of file