From 92464c7f248a3419acb56e8a44333edb4fc8d120 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Sun, 18 Dec 2022 20:56:33 +0800
Subject: [PATCH 01/10] wandb add_corpus and resume_training modify

---
 textbox/config/configurator.py    |  6 +++++-
 textbox/quick_start/experiment.py |  3 ++-
 textbox/trainer/trainer.py        | 11 ++++-------
 textbox/utils/argument_list.py    |  3 ++-
 textbox/utils/dashboard.py        |  8 --------
 5 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py
index 0d1fa87e..21dd029b 100755
--- a/textbox/config/configurator.py
+++ b/textbox/config/configurator.py
@@ -262,6 +262,7 @@ def _set_default_parameters(self):
         self.setdefault('valid_strategy', 'epoch')
         self.setdefault('valid_steps', 1)
         self.setdefault('disable_tqdm', False)
+        self.setdefault('resume_training',True)
         self._simplify_parameter('optimizer')
         self._simplify_parameter('scheduler')
         self._simplify_parameter('src_lang')
@@ -304,7 +305,10 @@ def check_load_type(self):
         if not self.final_config_dict.get('model_path', None):
             self.final_config_dict['load_type'] = 'from_scratch'
         elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')):
-            self.final_config_dict['load_type'] = 'resume'
+            if self.final_config_dict.get('resume_all'):
+                self.final_config_dict['load_type'] = 'resume '
+            else:
+                self.final_config_dict['load_type'] = 'resume'
         else:
             self.final_config_dict['load_type'] = 'from_pretrained'
 
diff --git a/textbox/quick_start/experiment.py b/textbox/quick_start/experiment.py
index 9c3314f0..00afd5e8 100644
--- a/textbox/quick_start/experiment.py
+++ b/textbox/quick_start/experiment.py
@@ -94,7 +94,8 @@ def _on_experiment_start(self, extended_config: Optional[dict]):
         self.valid_result: Optional[ResultType] = None
         self.test_result: Optional[ResultType] = None
         if config['load_type'] == 'resume':
-            self.trainer.resume_checkpoint(config['model_path'])
+            if config['resume_training']:
+                self.trainer.resume_checkpoint(config['model_path'])
             self.model.from_pretrained(config['model_path'])
 
     def _do_train_and_valid(self):
diff --git a/textbox/trainer/trainer.py b/textbox/trainer/trainer.py
index 241e1a6c..94e08a10 100755
--- a/textbox/trainer/trainer.py
+++ b/textbox/trainer/trainer.py
@@ -364,18 +364,15 @@ def save_checkpoint(self):
     def save_generated_text(self, generated_corpus: List[str], is_valid: bool = False):
         r"""Store the generated text by our model into `self.saved_text_filename`."""
         saved_text_filename = self.saved_text_filename
-        if not is_valid:
-            self._summary_tracker.add_corpus('test', generated_corpus)
-        else:
-            path_to_save = self.saved_model_filename + '_epoch-' + str(self.timestamp.valid_epoch)
-            saved_text_filename = os.path.join(path_to_save, 'generation.txt')
-            os.makedirs(path_to_save, exist_ok=True)
+        path_to_save = self.saved_model_filename + '_epoch-' + str(self.timestamp.valid_epoch)
+        saved_text_filename = os.path.join(path_to_save, 'generation.txt')
+        os.makedirs(path_to_save, exist_ok=True)
         with open(saved_text_filename, 'w') as fout:
             for text in generated_corpus:
                 fout.write(text + '\n')
 
     def resume_checkpoint(self, resume_dir: str):
-        r"""Load the model parameters information and training information.
+        r"""Load training information.
 
         Args:
             resume_dir: the checkpoint file (specific by `model_path`).
diff --git a/textbox/utils/argument_list.py b/textbox/utils/argument_list.py
index 95c5218d..bd18fdb6 100644
--- a/textbox/utils/argument_list.py
+++ b/textbox/utils/argument_list.py
@@ -43,7 +43,8 @@
     'weight_decay',  # common parameters
     'accumulation_steps',  # accelerator
     'disable_tqdm',  # tqdm
-    'pretrain_task'  # pretraining
+    'pretrain_task',  # pretraining
+    'resume_training'
 ]
 
 evaluation_parameters = [
diff --git a/textbox/utils/dashboard.py b/textbox/utils/dashboard.py
index 88e37ee6..3c43d93e 100644
--- a/textbox/utils/dashboard.py
+++ b/textbox/utils/dashboard.py
@@ -435,14 +435,6 @@ def add_scalar(self, tag: str, scalar_value: Union[float, int]):
         if self._is_local_main_process and not self.tracker_finished and self.axes is not None:
             wandb.log(info, step=self.axes.train_step, commit=False)
 
-    def add_corpus(self, tag: str, corpus: Iterable[str]):
-        r"""Add a corpus to summary."""
-        if tag.startswith('valid'):
-            self._current_epoch._update_metrics({'generated_corpus': '\n'.join(corpus)})
-        if self._is_local_main_process and not self.tracker_finished:
-            _corpus = wandb.Table(columns=[tag], data=pd.DataFrame(corpus))
-            wandb.log({tag: _corpus}, step=self.axes.train_step)
-
 
 root = None
 

From dbfabb2cec834f592a46e916de77feedd44299ef Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Mon, 26 Dec 2022 17:46:34 +0800
Subject: [PATCH 02/10] wandb modify

---
 install.sh                        | 2 +-
 textbox/config/configurator.py    | 1 +
 textbox/properties/overall.yaml   | 1 +
 textbox/quick_start/experiment.py | 2 ++
 textbox/utils/argument_list.py    | 1 +
 5 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/install.sh b/install.sh
index 069500ea..82ff2fbf 100644
--- a/install.sh
+++ b/install.sh
@@ -35,7 +35,7 @@ esac
 
 echo "Installation may take a few minutes."
 echo -e "\033[0;32mInstalling torch ...\033[0m"
-conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
+conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
 
 echo -e "\033[0;32mInstalling requirements ...\033[0m"
 pip install -r requirements.txt
diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py
index 21dd029b..e72be40e 100755
--- a/textbox/config/configurator.py
+++ b/textbox/config/configurator.py
@@ -263,6 +263,7 @@ def _set_default_parameters(self):
         self.setdefault('valid_steps', 1)
         self.setdefault('disable_tqdm', False)
         self.setdefault('resume_training',True)
+        self.setdefault('wandb', 'online')
         self._simplify_parameter('optimizer')
         self._simplify_parameter('scheduler')
         self._simplify_parameter('src_lang')
diff --git a/textbox/properties/overall.yaml b/textbox/properties/overall.yaml
index 684dd22b..0fa0d6be 100644
--- a/textbox/properties/overall.yaml
+++ b/textbox/properties/overall.yaml
@@ -5,6 +5,7 @@ seed: 2020
 state: INFO
 reproducibility: True
 data_path: 'dataset/'
+wandb: 'online'
 
 # training settings
 epochs: 50
diff --git a/textbox/quick_start/experiment.py b/textbox/quick_start/experiment.py
index 00afd5e8..6e58bbe8 100644
--- a/textbox/quick_start/experiment.py
+++ b/textbox/quick_start/experiment.py
@@ -37,6 +37,8 @@ def __init__(
         config_dict: Optional[Dict[str, Any]] = None,
     ):
         self.config = Config(model, dataset, config_file_list, config_dict)
+        wandb_setting = 'wandb ' + self.config['wandb']
+        os.system(wandb_setting)
         self.__extended_config = None
 
         self.accelerator = Accelerator(gradient_accumulation_steps=self.config['accumulation_steps'])
diff --git a/textbox/utils/argument_list.py b/textbox/utils/argument_list.py
index bd18fdb6..11f1b5ac 100644
--- a/textbox/utils/argument_list.py
+++ b/textbox/utils/argument_list.py
@@ -21,6 +21,7 @@
     '_hyper_tuning',  # hyper tuning
     'multi_seed',  # multiple random seed
     'romanian_postprocessing',
+    'wandb'
 ]
 
 training_parameters = [

From d07991efb1ee2ea0b3952ba3285b38f7cbe30949 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Mon, 26 Dec 2022 20:10:51 +0800
Subject: [PATCH 03/10] modify install.sh

---
 install.sh | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/install.sh b/install.sh
index 069500ea..68d7b24c 100644
--- a/install.sh
+++ b/install.sh
@@ -35,7 +35,7 @@ esac
 
 echo "Installation may take a few minutes."
 echo -e "\033[0;32mInstalling torch ...\033[0m"
-conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
+conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
 
 echo -e "\033[0;32mInstalling requirements ...\033[0m"
 pip install -r requirements.txt
@@ -75,16 +75,6 @@ chmod +rx $F2RExpDIR/WordNet-2.0.exc.db
 pip uninstall py-rouge
 pip install rouge > /dev/null
 
-echo -e "\033[0;32mInstalling requirements (libxml) ...\033[0m"
-if [[ "$OSTYPE" == "darwin"* ]]; then
-    brewinstall libxml2 cpanminus
-    cpanm --force XML::Parser
-else
-    if [ -x "$(command -v apt-get)" ];  then sudo apt-get install libxml-parser-perl
-    elif [ -x "$(command -v yum)" ];    then sudo yum install -y "perl(XML::LibXML)"
-    else echo -e '\033[0;31mFailed to install libxml. See https://github.com/pltrdy/files2rouge/issues/9 for more information.\033[0m' && exit;
-    fi
-fi
 
 echo -e "\033[0;32mInstalling requirements (transformers) ...\033[0m"
 git clone https://github.com/RUCAIBox/transformers.git

From 02bd80c1d36e2fa4c607fa17d0b616292dfb4861 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 19:46:09 +0800
Subject: [PATCH 04/10] modify basic_training.md

---
 asset/basic_training.md | 123 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/asset/basic_training.md b/asset/basic_training.md
index e69de29b..697111cf 100644
--- a/asset/basic_training.md
+++ b/asset/basic_training.md
@@ -0,0 +1,123 @@
+# Basic Training
+
+
+## config
+You may want to load your own configurations in different ways:
+* cmd
+* config files
+* yaml
+
+These ways are equivalent. However, we recommend using **cmd** first, followed by **config files**.
+
+
+
+### cmd 
+You may want to change configurations in the command line like ``--xx=yy``. ``xx`` is the name of the parameters and ``yy`` is the corresponding value. for example:
+
+```bash
+python run_textbox.py --model=BART --model_path=facebook/bart-base --epochs=1
+```
+
+It's suitable for **a few temporary** modifications with cmd like:
+* ``model``
+* ``model_path``
+* ``dataset``
+* ``epochs``
+* ...
+
+### config files
+
+You can also modify configurations in the local files:
+```bash
+python run_textbox.py ... --config_files <config-file-one> <config-file-two>
+```
+
+Every config file is an additional yaml files like:
+
+```yaml
+efficient_methods: ['prompt-tuning']
+```
+It's suitable for **a large number of** modifications or **long-term** modification with cmd like:
+* ``efficient_methods``
+* ``efficient_kwargs``
+* ...
+
+### yaml 
+
+The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanently** modification the dataset. These files are in the path ``textbox\properties``:
+* ``overall.yaml``
+* ``dataset\*.yaml``
+* ``model\*yaml``
+
+
+## trainer
+
+You can choose optimizer and scheduler through `optimizer=<optimizer-name>` and `scheduler=<scheduler-name>`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and [scheduler]() for a complete tutorial.  <!-- TODO -->
+
+Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=<int>` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`.
+
+`max_save=<int>` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files.
+
+Evaluation metrics can be specified with `metrics` ([full list](evaluation.md)), and produce a dictionaries of results:
+
+```bash
+python run_textbox.py ... --metrics=\[\'rouge\'\]
+# results: { 'rouge-1': xxx, 'rouge-2': xxx, 'rouge-l': xxx, 'rouge-w': xxx, ... }
+```
+
+**Early stopping** can be configured with `metrics_for_best_model=<list-of-metrics-entries>`, which is used to calculate score, and `stopping_steps=<int>`, which specifies the amount of validation steps:
+
+```bash
+python run_textbox.py ... --stopping_steps=8 --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\]
+```
+
+or yaml equivalent:
+
+```yaml
+stopping_steps: 8
+metrics_for_best_model: ['rouge-1', 'rouge-w']
+```
+
+Other commonly used parameters includes `epochs=<int>` and `max_steps=<int>` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=<float>`, `train_batch_size=<int>`, `weight_decay=<bool>`, and `grad_clip=<bool>`.
+
+
+
+### Partial Experiment
+
+You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=<amount-of-data-to-load>` to load just a few examples. 
+
+The following script loads the trained model from path `example` and conducts generation and evaluation without training and evaluation.
+```bash
+python run_textbox.py ... --do_train=False --do_valid=False \\
+--model_path=example --quick_test=16
+```
+
+
+
+
+
+
+
+
+## wandb
+
+If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text):
+
+```python
+%env WANDB_API_KEY=<your-key>
+```
+Here you can set wandb with `wandb`.
+
+If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled` and **none of the metrics** will be recorded.You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like:
+
+```yaml
+wandb: online
+```
+
+
+The local files can be uploaded by executing `wandb sync` in the command line.
+
+After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai).
+
+
+<!-- ===================== Model ===================== -->

From 51b66af17a0aca2968980dd88b0a058d81b96bd7 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 19:48:07 +0800
Subject: [PATCH 05/10] modify basic_training.md

---
 asset/basic_training.md | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/asset/basic_training.md b/asset/basic_training.md
index 697111cf..64eccff9 100644
--- a/asset/basic_training.md
+++ b/asset/basic_training.md
@@ -1,6 +1,4 @@
 # Basic Training
-
-
 ## config
 You may want to load your own configurations in different ways:
 * cmd
@@ -80,8 +78,6 @@ metrics_for_best_model: ['rouge-1', 'rouge-w']
 
 Other commonly used parameters includes `epochs=<int>` and `max_steps=<int>` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=<float>`, `train_batch_size=<int>`, `weight_decay=<bool>`, and `grad_clip=<bool>`.
 
-
-
 ### Partial Experiment
 
 You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=<amount-of-data-to-load>` to load just a few examples. 
@@ -92,13 +88,6 @@ python run_textbox.py ... --do_train=False --do_valid=False \\
 --model_path=example --quick_test=16
 ```
 
-
-
-
-
-
-
-
 ## wandb
 
 If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text):
@@ -114,10 +103,6 @@ If you are debugging your model, you may want to **disable W&B** with `--wandb=d
 wandb: online
 ```
 
-
 The local files can be uploaded by executing `wandb sync` in the command line.
 
-After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai).
-
-
-<!-- ===================== Model ===================== -->
+After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai).
\ No newline at end of file

From 74ef83808a4635865cd9750e2c2979eb7866b26c Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 19:51:26 +0800
Subject: [PATCH 06/10] configurator fixed

---
 textbox/config/configurator.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py
index e72be40e..839939a2 100755
--- a/textbox/config/configurator.py
+++ b/textbox/config/configurator.py
@@ -306,9 +306,6 @@ def check_load_type(self):
         if not self.final_config_dict.get('model_path', None):
             self.final_config_dict['load_type'] = 'from_scratch'
         elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')):
-            if self.final_config_dict.get('resume_all'):
-                self.final_config_dict['load_type'] = 'resume '
-            else:
                 self.final_config_dict['load_type'] = 'resume'
         else:
             self.final_config_dict['load_type'] = 'from_pretrained'

From 948fcde5d0a692afb252536410b80021d78278d9 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 20:06:31 +0800
Subject: [PATCH 07/10] basic_training modification and upload RNN.md

---
 asset/basic_training.md |  6 +-----
 instructions/RNN.md     | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/asset/basic_training.md b/asset/basic_training.md
index 64eccff9..aa919dd6 100644
--- a/asset/basic_training.md
+++ b/asset/basic_training.md
@@ -1,14 +1,10 @@
 # Basic Training
 ## config
-You may want to load your own configurations in different ways:
+You may want to load your own configurations in equivalent ways:
 * cmd
 * config files
 * yaml
 
-These ways are equivalent. However, we recommend using **cmd** first, followed by **config files**.
-
-
-
 ### cmd 
 You may want to change configurations in the command line like ``--xx=yy``. ``xx`` is the name of the parameters and ``yy`` is the corresponding value. for example:
 
diff --git a/instructions/RNN.md b/instructions/RNN.md
index e69de29b..eac13915 100644
--- a/instructions/RNN.md
+++ b/instructions/RNN.md
@@ -0,0 +1,16 @@
+## RNN
+
+You can train a RNN encoder-decoder with attention from scratch with this model. Three models are available:
+* RNN
+* GRU
+* LSTM
+
+You can choose them through ``model=RNN``,``model=GRU``,``model=LSTM``. Meanwhile, you can check or modify the default parameters of the model in ``textbox/property/model/rnn.yaml(gru.yaml)(lstm.yaml)``
+
+Example usage:
+
+```bash
+python run_textbox.py \
+    --model=RNN \
+    --dataset=samsum
+```
\ No newline at end of file

From b60633f1e6971343c18fd8abc32501e612baa458 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 21:35:22 +0800
Subject: [PATCH 08/10] basic_training modify

---
 asset/basic_training.md | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/asset/basic_training.md b/asset/basic_training.md
index aa919dd6..9f9566c6 100644
--- a/asset/basic_training.md
+++ b/asset/basic_training.md
@@ -21,7 +21,7 @@ It's suitable for **a few temporary** modifications with cmd like:
 
 ### config files
 
-You can also modify configurations in the local files:
+You can also modify configurations through the local files:
 ```bash
 python run_textbox.py ... --config_files <config-file-one> <config-file-two>
 ```
@@ -46,31 +46,22 @@ The original configurations are in the yaml files. You can check the values ther
 
 ## trainer
 
-You can choose optimizer and scheduler through `optimizer=<optimizer-name>` and `scheduler=<scheduler-name>`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and [scheduler]() for a complete tutorial.  <!-- TODO -->
+You can choose optimizer and scheduler through `optimizer=<optimizer-name>` and `scheduler=<scheduler-name>`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial.  <!-- TODO -->
 
 Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=<int>` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`.
 
 `max_save=<int>` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files.
 
-Evaluation metrics can be specified with `metrics` ([full list](evaluation.md)), and produce a dictionaries of results:
+According to ``metrics_for_best_model``, thr score of current checkpoint will be calculated, and evaluatin metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=<int>` and score of every checkpoint. 
 
-```bash
-python run_textbox.py ... --metrics=\[\'rouge\'\]
-# results: { 'rouge-1': xxx, 'rouge-2': xxx, 'rouge-l': xxx, 'rouge-w': xxx, ... }
-```
-
-**Early stopping** can be configured with `metrics_for_best_model=<list-of-metrics-entries>`, which is used to calculate score, and `stopping_steps=<int>`, which specifies the amount of validation steps:
 
 ```bash
-python run_textbox.py ... --stopping_steps=8 --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\]
+python run_textbox.py ... --stopping_steps=8 \\
+  --metrics_for_best_model=\[\'rouge-1\', \'rouge-w\'\] \\
+  --metrics=\[\'rouge\'\]
 ```
 
-or yaml equivalent:
-
-```yaml
-stopping_steps: 8
-metrics_for_best_model: ['rouge-1', 'rouge-w']
-```
+You can resume from a **previous checkpoint** through  ``model_path=<checkpoint_path>``.When you want to restrore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded.
 
 Other commonly used parameters includes `epochs=<int>` and `max_steps=<int>` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=<float>`, `train_batch_size=<int>`, `weight_decay=<bool>`, and `grad_clip=<bool>`.
 

From 6c1e961d52913072997266e89ca4a5f4cb1769ae Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 22:00:47 +0800
Subject: [PATCH 09/10] configurator delete space

---
 textbox/config/configurator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/textbox/config/configurator.py b/textbox/config/configurator.py
index 839939a2..800c67dc 100755
--- a/textbox/config/configurator.py
+++ b/textbox/config/configurator.py
@@ -306,7 +306,7 @@ def check_load_type(self):
         if not self.final_config_dict.get('model_path', None):
             self.final_config_dict['load_type'] = 'from_scratch'
         elif os.path.exists(os.path.join(self.final_config_dict['model_path'], 'textbox_configuration.pt')):
-                self.final_config_dict['load_type'] = 'resume'
+            self.final_config_dict['load_type'] = 'resume'
         else:
             self.final_config_dict['load_type'] = 'from_pretrained'
 

From 8cdeec0df1bf73dd58faa293a332cef0f6a26461 Mon Sep 17 00:00:00 2001
From: 1190303125 <2424390036@qq.com>
Date: Tue, 27 Dec 2022 22:33:18 +0800
Subject: [PATCH 10/10] add some examples and check the spelling

---
 asset/basic_training.md | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/asset/basic_training.md b/asset/basic_training.md
index 9f9566c6..b9f25353 100644
--- a/asset/basic_training.md
+++ b/asset/basic_training.md
@@ -1,6 +1,6 @@
 # Basic Training
 ## config
-You may want to load your own configurations in equivalent ways:
+You may want to load your configurations in equivalent ways:
 * cmd
 * config files
 * yaml
@@ -26,19 +26,19 @@ You can also modify configurations through the local files:
 python run_textbox.py ... --config_files <config-file-one> <config-file-two>
 ```
 
-Every config file is an additional yaml files like:
+Every config file is an additional yaml file like:
 
 ```yaml
 efficient_methods: ['prompt-tuning']
 ```
-It's suitable for **a large number of** modifications or **long-term** modification with cmd like:
+It's suitable for **a large number of** modifications or **long-term** modifications with cmd like:
 * ``efficient_methods``
 * ``efficient_kwargs``
 * ...
 
 ### yaml 
 
-The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanently** modification the dataset. These files are in the path ``textbox\properties``:
+The original configurations are in the yaml files. You can check the values there, but it's not recommended to modify the files except for **permanent** modification of the dataset. These files are in the path ``textbox\properties``:
 * ``overall.yaml``
 * ``dataset\*.yaml``
 * ``model\*yaml``
@@ -46,13 +46,13 @@ The original configurations are in the yaml files. You can check the values ther
 
 ## trainer
 
-You can choose optimizer and scheduler through `optimizer=<optimizer-name>` and `scheduler=<scheduler-name>`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial.  <!-- TODO -->
+You can choose an optimizer and scheduler through `optimizer=<optimizer-name>` and `scheduler=<scheduler-name>`. We provide a wrapper around **pytorch optimizer**, which means parameters like `epsilon` or `warmup_steps` can be specified with keyword dictionaries `optimizer_kwargs={'epsilon': ... }` and `scheduler_kwargs={'warmup_steps': ... }`. See [pytorch optimizer](https://pytorch.org/docs/stable/optim.html#algorithms) and scheduler for a complete tutorial.  <!-- TODO -->
 
-Validation frequency is introduced to validate the model **at each specific batch-steps or epochs**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=<int>` to adjust the pace. Specifically, traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`.
+Validation frequency is introduced to validate the model **at each specific batch-steps or epoch**. Specify `valid_strategy` (either `'step'` or `'epoch'`) and `valid_steps=<int>` to adjust the pace. Specifically, the traditional train-validate paradigm is a special case with `valid_strategy=epoch` and `valid_steps=1`.
 
-`max_save=<int>` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with best score, and `n`: save both the best and the last $n−1$ files.
+`max_save=<int>` indicates **the maximal amount of saved files** (checkpoint and generated corpus during evaluation). `-1`: save every file, `0`: do not save any file, `1`: only save the file with the best score, and `n`: save both the best and the last $n−1$ files.
 
-According to ``metrics_for_best_model``, thr score of current checkpoint will be calculated, and evaluatin metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=<int>` and score of every checkpoint. 
+According to ``metrics_for_best_model``, the score of the current checkpoint will be calculated, and evaluation metrics specified with ``metrics``([full list](evaluation.md)) will be chosen. **Early stopping** can be configured with `stopping_steps=<int>` and score of every checkpoint. 
 
 
 ```bash
@@ -61,30 +61,35 @@ python run_textbox.py ... --stopping_steps=8 \\
   --metrics=\[\'rouge\'\]
 ```
 
-You can resume from a **previous checkpoint** through  ``model_path=<checkpoint_path>``.When you want to restrore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded.
+You can resume from a **previous checkpoint** through ``model_path=<checkpoint_path>``.When you want to restore **all trainer parameters** like optimizer and start_epoch, you can set ``resume_training=True``. Otherwise, only **model and tokenizer** will be loaded. The script below will resume training from checkpoint in the path ``saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best``
 
-Other commonly used parameters includes `epochs=<int>` and `max_steps=<int>` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=<float>`, `train_batch_size=<int>`, `weight_decay=<bool>`, and `grad_clip=<bool>`.
+```bash
+python run_textbox --model_path=saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best \\
+--resume_training=True
+```
+
+Other commonly used parameters include `epochs=<int>` and `max_steps=<int>` (indicating maximum iteration of epochs and batch steps, if you set `max_steps`, `epochs` will be invalid), `learning_rate=<float>`, `train_batch_size=<int>`, `weight_decay=<bool>`, and `grad_clip=<bool>`.
 
 ### Partial Experiment
 
-You can run partial experiment with `do_train`, `do_valid`, `do_test`. You can test your pipeline and debug with `quick_test=<amount-of-data-to-load>` to load just a few examples. 
+You can run the partial experiment with `do_train`, `do_valid`and `do_test`. You can test your pipeline and debug with `quick_test=<amount-of-data-to-load>` to load just a few examples. 
 
-The following script loads the trained model from path `example` and conducts generation and evaluation without training and evaluation.
+The following script loads the trained model from a local path and conducts generation and evaluation without training and evaluation.
 ```bash
-python run_textbox.py ... --do_train=False --do_valid=False \\
---model_path=example --quick_test=16
+python run_textbox.py --model_path=saved/BART-samsum-2022-Dec-18_20-57-47/checkpoint_best \\
+--do_train=False --do_valid=False
 ```
 
 ## wandb
 
-If you are running your code in jupyter environments, you may want to login by simply setting an environment variable (your key may be stored in plain text):
+If you are running your code in jupyter environments, you may want to log in by simply setting an environment variable (your key may be stored in plain text):
 
 ```python
 %env WANDB_API_KEY=<your-key>
 ```
 Here you can set wandb with `wandb`.
 
-If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled` and **none of the metrics** will be recorded.You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like:
+If you are debugging your model, you may want to **disable W&B** with `--wandb=disabled`, and **none of the metrics** will be recorded. You can also disable **sync only** with `--wandb=offline` and enable it again with `--wandb=online` to upload to the cloud. Meanwhile, the parameter can be configured in the yaml file like:
 
 ```yaml
 wandb: online
@@ -92,4 +97,4 @@ wandb: online
 
 The local files can be uploaded by executing `wandb sync` in the command line.
 
-After configuration, you can throttle wandb prompts by defining environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai).
\ No newline at end of file
+After configuration, you can throttle wandb prompts by defining the environment variable `export WANDB_SILENT=false`. For more information, see [documentation](docs.wandb.ai).
\ No newline at end of file