From abbfa9161cd40448cecbd2ca07e47ed7af68427f Mon Sep 17 00:00:00 2001 From: shibing624 Date: Thu, 15 Jun 2023 14:46:07 +0800 Subject: [PATCH] update training pipeline. --- README.md | 15 +- requirements.txt | 1 + reward_modeling.py | 12 +- rl_training.py | 6 +- run_pretraining.ipynb | 235 --------- run_reward_modeling.ipynb | 227 --------- run_rl_training.ipynb | 205 -------- run_supervised_finetuning.ipynb | 240 --------- run_training_pipeline.ipynb | 865 ++++++++++++++++++++++++++++++++ 9 files changed, 883 insertions(+), 923 deletions(-) delete mode 100644 run_pretraining.ipynb delete mode 100644 run_reward_modeling.ipynb delete mode 100644 run_rl_training.ipynb delete mode 100644 run_supervised_finetuning.ipynb create mode 100644 run_training_pipeline.ipynb diff --git a/README.md b/README.md index 8a141ba..513aea9 100644 --- a/README.md +++ b/README.md @@ -76,13 +76,14 @@ python gradio_demo.py --model_type base_model_type --base_model path_to_llama_hf Training Stage: -| Stage | Introduction | Open In Colab | Python script | Shell script | -|:--------------------------------|:-------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------| -| Stage 1: Continue Pretraining | 增量预训练 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_pretraining.ipynb) | [pretraining.py](https://github.com/shibing624/MedicalGPT/blob/main/pretraining.py) | [run_pt.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_pt.sh) | -| Stage 2: Supervised Fine-tuning | 有监督微调 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_supervised_finetuning.ipynb) | [supervised_finetuning.py](https://github.com/shibing624/MedicalGPT/blob/main/supervised_finetuning.py) | [run_sft.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_sft.sh) | -| Stage 3: Reward Modeling | 奖励模型建模 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_reward_modeling.ipynb) | [reward_modeling.py](https://github.com/shibing624/MedicalGPT/blob/main/reward_modeling.py) | [run_rm.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rm.sh) | -| Stage 4: Reinforcement Learning | 强化学习 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_rl_training.ipynb) | [rl_training.py](https://github.com/shibing624/MedicalGPT/blob/main/rl_training.py) | [run_rl.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rl.sh) | - +| Stage | Introduction | Python script | Shell script | +|:--------------------------------|:-------------|:------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------| +| Stage 1: Continue Pretraining | 增量预训练 | [pretraining.py](https://github.com/shibing624/MedicalGPT/blob/main/pretraining.py) | [run_pt.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_pt.sh) | +| Stage 2: Supervised Fine-tuning | 有监督微调 | [supervised_finetuning.py](https://github.com/shibing624/MedicalGPT/blob/main/supervised_finetuning.py) | [run_sft.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_sft.sh) | +| Stage 3: Reward Modeling | 奖励模型建模 | [reward_modeling.py](https://github.com/shibing624/MedicalGPT/blob/main/reward_modeling.py) | [run_rm.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rm.sh) | +| Stage 4: Reinforcement Learning | 强化学习 | [rl_training.py](https://github.com/shibing624/MedicalGPT/blob/main/rl_training.py) | [run_rl.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rl.sh) | + +提供完整四阶段串起来训练的pipeline:[run_training_pipeline.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_training_pipeline.ipynb) ,其对应的colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_training_pipeline.ipynb) [训练参数说明wiki](https://github.com/shibing624/MedicalGPT/wiki/%E8%AE%AD%E7%BB%83%E7%BB%86%E8%8A%82%E8%AF%B4%E6%98%8E) diff --git a/requirements.txt b/requirements.txt index e01138e..0e813c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ loguru transformers>=4.30.1 +sentencepiece datasets tensorboard tqdm>=4.47.0 diff --git a/reward_modeling.py b/reward_modeling.py index df98191..30cfab8 100644 --- a/reward_modeling.py +++ b/reward_modeling.py @@ -514,7 +514,7 @@ def main(): logger.info(f"Raw datasets: {raw_datasets}") # Preprocessing the datasets - max_length = data_args.max_source_length + data_args.max_target_length + full_max_length = data_args.max_source_length + data_args.max_target_length def preprocess_reward_function(examples): """ @@ -560,8 +560,8 @@ def preprocess_reward_function(examples): desc="Running tokenizer on dataset", ) train_dataset = tokenized_dataset.filter( - lambda x: 0 < len(x['input_ids_rejected']) <= max_length and 0 < len( - x['input_ids_chosen']) <= max_length + lambda x: 0 < len(x['input_ids_rejected']) <= full_max_length and 0 < len( + x['input_ids_chosen']) <= full_max_length ) logger.debug(f"Num train_samples: {len(train_dataset)}") logger.debug("Tokenized training example:") @@ -588,8 +588,8 @@ def preprocess_reward_function(examples): desc="Running tokenizer on dataset", ) eval_dataset = tokenized_dataset.filter( - lambda x: 0 < len(x['input_ids_rejected']) <= max_length and 0 < len( - x['input_ids_chosen']) <= max_length + lambda x: 0 < len(x['input_ids_rejected']) <= full_max_length and 0 < len( + x['input_ids_chosen']) <= full_max_length ) logger.debug(f"Num eval_samples: {len(eval_dataset)}") logger.debug("Tokenized eval example:") @@ -614,7 +614,7 @@ def preprocess_reward_function(examples): tokenizer=tokenizer, compute_metrics=compute_metrics, data_collator=RewardDataCollatorWithPadding( - tokenizer=tokenizer, max_length=max_length, padding="max_length" + tokenizer=tokenizer, max_length=full_max_length, padding="max_length" ), ) diff --git a/rl_training.py b/rl_training.py index e768e55..359f55b 100644 --- a/rl_training.py +++ b/rl_training.py @@ -35,13 +35,13 @@ "llama": (LlamaForCausalLM, LlamaTokenizer), } - PROMPT_TEMPLATE = ( "Below is an instruction that describes a task. " "Write a response that appropriately completes the request.\n\n" "### Instruction:\n{instruction}\n\n### Response: " ) + @dataclass class ScriptArguments: """ @@ -169,7 +169,6 @@ def __post_init__(self): raise ValueError("You must specify a valid reward_model_name_or_path to run training.") - def print_trainable_parameters(model): """ Prints the number of trainable parameters in the model. @@ -202,6 +201,8 @@ def main(): logger.warning(f"Parse args: {args}") model_class, tokenizer_class = MODEL_CLASSES[args.model_type] + if args.model_type == 'bloom': + args.use_fast_tokenizer = True # Load tokenizer tokenizer_kwargs = { "cache_dir": args.cache_dir, @@ -359,7 +360,6 @@ def preprocess_function(examples): logger.debug("Tokenized training example:") # logger.debug(tokenizer.decode(train_dataset[0]['input_ids'])) - def collator(data): return dict((key, [d[key] for d in data]) for key in data[0]) diff --git a/run_pretraining.ipynb b/run_pretraining.ipynb deleted file mode 100644 index c9c81d9..0000000 --- a/run_pretraining.ipynb +++ /dev/null @@ -1,235 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "| Stage 1: Continue Pretraining | 增量预训练,在海量领域文本数据上继续预训练GPT模型,以注入领域知识 | [pretraining.py](https://github.com/shibing624/MedicalGPT/blob/main/pretraining.py) | [run_pt.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_pt.sh) | [run_pretraining.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_pretraining.ipynb) | [Open In Colab](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_pretraining.ipynb) |" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# Stage 1: Continue Pretraining\n", - "\n", - "第一阶段:PT(Continue PreTraining)增量预训练,在海量领域文本数据上二次预训练LLaMA类GPT模型,以注入领域知识\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 说明:\n", - "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", - "\n", - "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m`\n", - "2. 数据集:PT阶段使用的是中文天龙八部小说部分文本和英文书籍部分文本,位于`data/pretrain`文件夹" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 配置运行环境\n", - "\n", - "本地执行可注释以下配置环境的命令,colab执行要打开注释,用于配置环境\n", - "\n", - "colab建议使用T4 GPU训练,设置方式:`代码执行程序 -> 更改运行时类型 -> 运行时类型:Python3,硬件加速器:GPU,GPU类型:T4 -> 保存`\n", - "\n", - "步骤:\n", - "1. 下载最新代码到本地\n", - "2. 安装依赖包\n", - "\n", - "依赖包如下,保证最新版本:\n", - "\n", - "```\n", - "loguru\n", - "transformers\n", - "datasets\n", - "tensorboard\n", - "tqdm\n", - "peft\n", - "trl\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "!git clone --depth 1 https://github.com/shibing624/MedicalGPT.git\n", - "%cd MedicalGPT\n", - "%ls\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 咱们开始吧\n", - "\n", - "训练步骤如下:\n", - "\n", - "1. 确认训练集\n", - "2. 执行训练脚本\n", - "\n", - "训练脚本的执行逻辑如下:\n", - "1. 导入依赖包\n", - "2. 设置参数\n", - "3. 定义各函数并加载训练集\n", - "4. 加载模型和tokenizer\n", - "5. 开始训练并评估\n", - "6. 查看训练结果" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "en_article_tail500.txt tianlongbabu.txt\n" - ] - } - ], - "source": [ - "%ls ./data/pretrain/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "!python pretraining.py \\\n", - " --model_type bloom \\\n", - " --model_name_or_path bigscience/bloomz-560m \\\n", - " --train_file_dir ./data/pretrain \\\n", - " --validation_file_dir ./data/pretrain \\\n", - " --per_device_train_batch_size 1 \\\n", - " --per_device_eval_batch_size 1 \\\n", - " --do_train \\\n", - " --do_eval \\\n", - " --use_peft True \\\n", - " --seed 42 \\\n", - " --fp16 \\\n", - " --max_train_samples 10000 \\\n", - " --max_eval_samples 10 \\\n", - " --num_train_epochs 0.5 \\\n", - " --learning_rate 2e-4 \\\n", - " --warmup_ratio 0.05 \\\n", - " --weight_decay 0.01 \\\n", - " --logging_strategy steps \\\n", - " --logging_steps 10 \\\n", - " --eval_steps 50 \\\n", - " --evaluation_strategy steps \\\n", - " --save_steps 500 \\\n", - " --save_strategy steps \\\n", - " --save_total_limit 3 \\\n", - " --gradient_accumulation_steps 1 \\\n", - " --preprocessing_num_workers 1 \\\n", - " --block_size 1024 \\\n", - " --output_dir outputs-pt-v1 \\\n", - " --overwrite_output_dir \\\n", - " --ddp_timeout 30000 \\\n", - " --logging_first_step True \\\n", - " --target_modules all \\\n", - " --lora_rank 8 \\\n", - " --lora_alpha 16 \\\n", - " --lora_dropout 0.05 \\\n", - " --torch_dtype float16 \\\n", - " --device_map auto \\\n", - " --report_to tensorboard \\\n", - " --ddp_find_unused_parameters False \\\n", - " --gradient_checkpointing True" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "adapter_config.json \u001B[0m\u001B[01;34mruns\u001B[0m/ train_results.json\n", - "adapter_model.bin special_tokens_map.json trainer_state.json\n", - "all_results.json tokenizer.json training_args.bin\n", - "eval_results.json tokenizer_config.json\n" - ] - } - ], - "source": [ - "%ls outputs-pt-v1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "模型训练结果:\n", - "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", - "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "本节完。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "name": "python3", - "language": "python", - "display_name": "Python 3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f34eed0bebedfc4b6ee51ced43d2c030fe3b92f13c149d072205ca200a67b1ec" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/run_reward_modeling.ipynb b/run_reward_modeling.ipynb deleted file mode 100644 index 2e89f1a..0000000 --- a/run_reward_modeling.ipynb +++ /dev/null @@ -1,227 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "| Stage 3: Reward Modeling | RM(Reward Model)奖励模型建模,构造人类偏好排序数据集,训练奖励模型,用来对齐人类偏好,主要是\"helpful, honest, harmless\"原则 | [reward_modeling.py](https://github.com/shibing624/MedicalGPT/blob/main/reward_modeling.py) | [run_rm.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rm.sh) | [run_reward_modeling.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_reward_modeling.ipynb) | [Open In Colab](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_reward_modeling.ipynb) |" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# Stage 3: Reward Modeling\n", - "\n", - "第三阶段:RM(Reward Model)奖励模型建模,构造人类偏好排序数据集,训练奖励模型,用来对齐人类偏好,主要是\"HHH\"原则,具体是\"helpful, honest, harmless\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 说明:\n", - "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", - "\n", - "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m`\n", - "2. 数据集:RM阶段使用的是医疗reward数据,抽样了500条,位于`data/reward`文件夹" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 配置运行环境\n", - "\n", - "本地执行可注释以下配置环境的命令,colab执行要打开注释,用于配置环境\n", - "\n", - "colab建议使用T4 GPU训练,设置方式:`代码执行程序 -> 更改运行时类型 -> 运行时类型:Python3,硬件加速器:GPU,GPU类型:T4 -> 保存`\n", - "\n", - "步骤:\n", - "1. 下载最新代码到本地\n", - "2. 安装依赖包\n", - "\n", - "依赖包如下,保证最新版本:\n", - "\n", - "```\n", - "loguru\n", - "transformers\n", - "datasets\n", - "tensorboard\n", - "tqdm\n", - "peft\n", - "trl\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!git clone --depth 1 https://github.com/shibing624/MedicalGPT.git\n", - "%cd MedicalGPT\n", - "%ls\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 咱们开始吧\n", - "\n", - "训练步骤如下:\n", - "\n", - "1. 确认训练集\n", - "2. 执行训练脚本\n", - "\n", - "训练脚本的执行逻辑如下:\n", - "1. 导入依赖包\n", - "2. 设置参数\n", - "3. 定义各函数并加载训练集\n", - "4. 加载模型和tokenizer\n", - "5. 开始训练并评估\n", - "6. 查看训练结果" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test.json\n" - ] - } - ], - "source": [ - "%ls ./data/reward/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "!python reward_modeling.py \\\n", - " --model_type bloom \\\n", - " --model_name_or_path bigscience/bloomz-560m \\\n", - " --train_file_dir ./data/reward \\\n", - " --validation_file_dir ./data/reward \\\n", - " --per_device_train_batch_size 1 \\\n", - " --per_device_eval_batch_size 1 \\\n", - " --do_train \\\n", - " --use_peft True \\\n", - " --seed 42 \\\n", - " --max_train_samples 1000 \\\n", - " --max_eval_samples 10 \\\n", - " --num_train_epochs 1 \\\n", - " --learning_rate 2e-5 \\\n", - " --warmup_ratio 0.05 \\\n", - " --weight_decay 0.001 \\\n", - " --logging_strategy steps \\\n", - " --logging_steps 10 \\\n", - " --eval_steps 50 \\\n", - " --evaluation_strategy steps \\\n", - " --save_steps 500 \\\n", - " --save_strategy steps \\\n", - " --save_total_limit 3 \\\n", - " --max_source_length 256 \\\n", - " --max_target_length 256 \\\n", - " --output_dir outputs-rm-v1 \\\n", - " --overwrite_output_dir \\\n", - " --ddp_timeout 30000 \\\n", - " --logging_first_step True \\\n", - " --target_modules all \\\n", - " --lora_rank 8 \\\n", - " --lora_alpha 16 \\\n", - " --lora_dropout 0.05 \\\n", - " --torch_dtype float32 \\\n", - " --device_map auto \\\n", - " --report_to tensorboard \\\n", - " --ddp_find_unused_parameters False \\\n", - " --remove_unused_columns False \\\n", - " --gradient_checkpointing True" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "adapter_config.json \u001B[0m\u001B[01;34mruns\u001B[0m/ train_results.json\n", - "adapter_model.bin special_tokens_map.json trainer_state.json\n", - "all_results.json tokenizer.json training_args.bin\n", - "eval_results.json tokenizer_config.json vocab.txt\n" - ] - } - ], - "source": [ - "%ls outputs-rm-v1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "模型训练结果:\n", - "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", - "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "本节完。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "py38xm", - "language": "python", - "name": "py38kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f34eed0bebedfc4b6ee51ced43d2c030fe3b92f13c149d072205ca200a67b1ec" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/run_rl_training.ipynb b/run_rl_training.ipynb deleted file mode 100644 index 2eeff30..0000000 --- a/run_rl_training.ipynb +++ /dev/null @@ -1,205 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "| Stage 4: Reinforcement Learning | 基于人类反馈的强化学习(RLHF),用奖励模型来训练SFT模型,生成模型使用奖励或惩罚来更新其策略,以便生成更高质量、更符合人类偏好的文本 | [rl_training.py](https://github.com/shibing624/MedicalGPT/blob/main/rl_training.py) | [run_rl.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rl.sh) | [run_rl_training.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_rl_training.ipynb) | [Open In Colab](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_rl_training.ipynb) |" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Stage 4: Reinforcement Learning Training\n", - "\n", - "第四阶段:RL(Reinforcement Learning)基于人类反馈的强化学习(RLHF),用奖励模型来训练SFT模型,生成模型使用奖励或惩罚来更新其策略,以便生成更高质量、更符合人类偏好的文本\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 说明:\n", - "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型、奖励模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", - "\n", - "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m`\n", - "2. 奖励模型:使用的是`OpenAssistant/reward-model-deberta-v3-large-v2`\n", - "3. 数据集:RL阶段的数据可以复用SFT的数据集,使用的是Belle的1千条抽样数据,位于`data/finetune`文件夹" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 配置运行环境\n", - "\n", - "本地执行可注释以下配置环境的命令,colab执行要打开注释,用于配置环境\n", - "\n", - "colab建议使用T4 GPU训练,设置方式:`代码执行程序 -> 更改运行时类型 -> 运行时类型:Python3,硬件加速器:GPU,GPU类型:T4 -> 保存`\n", - "\n", - "步骤:\n", - "1. 下载最新代码到本地\n", - "2. 安装依赖包\n", - "\n", - "依赖包如下,保证最新版本:\n", - "\n", - "```\n", - "loguru\n", - "transformers\n", - "datasets\n", - "tensorboard\n", - "tqdm\n", - "peft\n", - "trl\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!git clone --depth 1 https://github.com/shibing624/MedicalGPT.git\n", - "%cd MedicalGPT\n", - "%ls\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 咱们开始吧\n", - "\n", - "训练步骤如下:\n", - "\n", - "1. 确认训练集\n", - "2. 执行训练脚本\n", - "\n", - "训练脚本的执行逻辑如下:\n", - "1. 导入依赖包\n", - "2. 设置参数\n", - "3. 定义各函数并加载训练集\n", - "4. 加载生成模型和tokenizer,加载奖励模型和其tokenizer\n", - "5. 开始训练并评估\n", - "6. 查看训练结果" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "%ls ./data/finetune/" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "!python rl_training.py \\\n", - " --model_type bloom \\\n", - " --model_name_or_path bigscience/bloomz-560m \\\n", - " --reward_model_name_or_path OpenAssistant/reward-model-deberta-v3-large-v2 \\\n", - " --torch_dtype float16 \\\n", - " --device_map auto \\\n", - " --train_file_dir ./data/finetune \\\n", - " --validation_file_dir ./data/finetune \\\n", - " --batch_size 2 \\\n", - " --max_source_length 256 \\\n", - " --max_target_length 256 \\\n", - " --max_train_samples 1000 \\\n", - " --use_peft True \\\n", - " --lora_rank 8 \\\n", - " --lora_alpha 16 \\\n", - " --lora_dropout 0.05 \\\n", - " --do_train \\\n", - " --max_steps 100 \\\n", - " --learning_rate 1e-5 \\\n", - " --save_steps 50 \\\n", - " --output_dir outputs-rl-v1 \\\n", - " --early_stopping True \\\n", - " --target_kl 0.1 \\\n", - " --reward_baseline 0.0" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "adapter_config.json pytorch_model.bin tokenizer.json \u001B[0m\u001B[01;34mtrl\u001B[0m/\n", - "adapter_model.bin special_tokens_map.json tokenizer_config.json\n" - ] - } - ], - "source": [ - "%ls outputs-rl-v1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "模型训练结果:\n", - "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", - "- 日志保存在`output_dir/trl`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/trl --host 0.0.0.0 --port 8009`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "本节完。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "name": "python3", - "language": "python", - "display_name": "Python 3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f34eed0bebedfc4b6ee51ced43d2c030fe3b92f13c149d072205ca200a67b1ec" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/run_supervised_finetuning.ipynb b/run_supervised_finetuning.ipynb deleted file mode 100644 index 23fe88b..0000000 --- a/run_supervised_finetuning.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "| Stage 2: Supervised Fine-tuning | 有监督微调,构造指令微调数据集,在预训练模型基础上做指令精调,以对齐指令意图 | [supervised_finetuning.py](https://github.com/shibing624/MedicalGPT/blob/main/supervised_finetuning.py) | [run_sft.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_sft.sh) | [run_supervised_finetuning.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_supervised_finetuning.ipynb) | [Open In Colab](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_supervised_finetuning.ipynb) |" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# Stage 2: Supervised FineTuning\n", - "\n", - "第二阶段:SFT(Supervised Fine-tuning)有监督微调,构造指令微调数据集,在预训练模型基础上做指令精调,以对齐指令意图\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 说明:\n", - "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", - "\n", - "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m`\n", - "2. 数据集:SFT阶段使用的是使用的是Belle的1千条抽样数据,位于`data/finetune`文件夹" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 配置运行环境\n", - "\n", - "本地执行可注释以下配置环境的命令,colab执行要打开注释,用于配置环境\n", - "\n", - "colab建议使用T4 GPU训练,设置方式:`代码执行程序 -> 更改运行时类型 -> 运行时类型:Python3,硬件加速器:GPU,GPU类型:T4 -> 保存`\n", - "\n", - "步骤:\n", - "1. 下载最新代码到本地\n", - "2. 安装依赖包\n", - "\n", - "依赖包如下,保证最新版本:\n", - "\n", - "```\n", - "loguru\n", - "transformers\n", - "datasets\n", - "tensorboard\n", - "tqdm\n", - "peft\n", - "trl\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "start_time": "2023-06-08T09:56:32.966593Z", - "end_time": "2023-06-08T09:56:32.973366Z" - } - }, - "outputs": [], - "source": [ - "!git clone --depth 1 https://github.com/shibing624/MedicalGPT.git\n", - "%cd MedicalGPT\n", - "%ls\n", - "!pip install -r requirements.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 咱们开始吧\n", - "\n", - "训练步骤如下:\n", - "\n", - "1. 确认训练集\n", - "2. 执行训练脚本\n", - "\n", - "训练脚本的执行逻辑如下:\n", - "1. 导入依赖包\n", - "2. 设置参数\n", - "3. 定义各函数并加载训练集\n", - "4. 加载模型和tokenizer\n", - "5. 开始训练并评估\n", - "6. 查看训练结果" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Belle_open_source_1k.json\n" - ] - } - ], - "source": [ - "%ls ./data/finetune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "!python supervised_finetuning.py \\\n", - " --model_type bloom \\\n", - " --model_name_or_path bigscience/bloomz-560m \\\n", - " --train_file_dir ./data/finetune \\\n", - " --validation_file_dir ./data/finetune \\\n", - " --per_device_train_batch_size 1 \\\n", - " --per_device_eval_batch_size 1 \\\n", - " --do_train \\\n", - " --do_eval \\\n", - " --use_peft True \\\n", - " --fp16 \\\n", - " --max_train_samples 1000 \\\n", - " --max_eval_samples 10 \\\n", - " --num_train_epochs 1 \\\n", - " --learning_rate 2e-5 \\\n", - " --warmup_ratio 0.05 \\\n", - " --weight_decay 0.05 \\\n", - " --logging_strategy steps \\\n", - " --logging_steps 10 \\\n", - " --eval_steps 50 \\\n", - " --evaluation_strategy steps \\\n", - " --save_steps 500 \\\n", - " --save_strategy steps \\\n", - " --save_total_limit 3 \\\n", - " --gradient_accumulation_steps 1 \\\n", - " --preprocessing_num_workers 1 \\\n", - " --max_source_length 256 \\\n", - " --max_target_length 256 \\\n", - " --output_dir outputs-sft-v1 \\\n", - " --overwrite_output_dir \\\n", - " --ddp_timeout 30000 \\\n", - " --logging_first_step True \\\n", - " --target_modules all \\\n", - " --lora_rank 8 \\\n", - " --lora_alpha 16 \\\n", - " --lora_dropout 0.05 \\\n", - " --torch_dtype float16 \\\n", - " --device_map auto \\\n", - " --report_to tensorboard \\\n", - " --ddp_find_unused_parameters False \\\n", - " --gradient_checkpointing True" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", - "adapter_config.json \u001B[0m\u001B[01;34mruns\u001B[0m/ train_results.json\n", - "adapter_model.bin special_tokens_map.json trainer_state.json\n", - "all_results.json tokenizer.json training_args.bin\n", - "eval_results.json tokenizer_config.json\n" - ] - } - ], - "source": [ - "%ls outputs-sft-v1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "模型训练结果:\n", - "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", - "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "本节完。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "name": "python3", - "language": "python", - "display_name": "Python 3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f34eed0bebedfc4b6ee51ced43d2c030fe3b92f13c149d072205ca200a67b1ec" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/run_training_pipeline.ipynb b/run_training_pipeline.ipynb new file mode 100644 index 0000000..17a918f --- /dev/null +++ b/run_training_pipeline.ipynb @@ -0,0 +1,865 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Training Pipeline\n", + "[run_training_pipeline.ipynb](https://github.com/shibing624/MedicalGPT/blob/main/run_training_pipeline.ipynb) | [Open In Colab](https://colab.research.google.com/github/shibing624/MedicalGPT/blob/main/run_training_pipeline.ipynb)" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Stage 1: Continue Pretraining\n", + "\n", + "第一阶段:PT(Continue PreTraining)增量预训练,在海量领域文本数据上二次预训练GPT模型,以注入领域知识\n", + "\n", + "| Stage 1: Continue Pretraining | [pretraining.py](https://github.com/shibing624/MedicalGPT/blob/main/pretraining.py) | [run_pt.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_pt.sh) |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 说明:\n", + "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", + "\n", + "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m`\n", + "2. 数据集:PT阶段使用的是中文天龙八部小说部分文本和英文书籍部分文本,位于`data/pretrain`文件夹" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 配置运行环境\n", + "\n", + "本地执行可注释以下配置环境的命令,colab执行要打开注释,用于配置环境\n", + "\n", + "colab建议使用T4 GPU训练,设置方式:`代码执行程序 -> 更改运行时类型 -> 运行时类型:Python3,硬件加速器:GPU,GPU类型:T4 -> 保存`\n", + "\n", + "步骤:\n", + "1. 下载最新代码到本地\n", + "2. 安装依赖包\n", + "\n", + "依赖包如下,保证最新版本:\n", + "\n", + "```\n", + "loguru\n", + "transformers\n", + "sentencepiece\n", + "datasets\n", + "tensorboard\n", + "tqdm\n", + "peft\n", + "trl\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!git clone --depth 1 https://github.com/shibing624/MedicalGPT.git\n", + "%cd MedicalGPT\n", + "%ls\n", + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stage1 咱们开始吧\n", + "\n", + "训练步骤如下:\n", + "\n", + "1. 确认训练集\n", + "2. 执行训练脚本\n", + "\n", + "训练脚本的执行逻辑如下:\n", + "1. 导入依赖包\n", + "2. 设置参数\n", + "3. 定义各函数并加载训练集\n", + "4. 加载模型和tokenizer\n", + "5. 开始训练并评估\n", + "6. 查看训练结果\n", + "\n", + "**以下参数可以根据你的GPU实际情况修改,当前参数是根据Colab的T4单卡GPU(16GB显存)配置的**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%ls ./data/pretrain/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python pretraining.py \\\n", + " --model_type bloom \\\n", + " --model_name_or_path bigscience/bloomz-560m \\\n", + " --train_file_dir ./data/pretrain \\\n", + " --validation_file_dir ./data/pretrain \\\n", + " --per_device_train_batch_size 3 \\\n", + " --per_device_eval_batch_size 3 \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --use_peft True \\\n", + " --seed 42 \\\n", + " --fp16 \\\n", + " --max_train_samples 10000 \\\n", + " --max_eval_samples 10 \\\n", + " --num_train_epochs 1 \\\n", + " --learning_rate 2e-4 \\\n", + " --warmup_ratio 0.05 \\\n", + " --weight_decay 0.01 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 10 \\\n", + " --eval_steps 50 \\\n", + " --evaluation_strategy steps \\\n", + " --save_steps 500 \\\n", + " --save_strategy steps \\\n", + " --save_total_limit 3 \\\n", + " --gradient_accumulation_steps 1 \\\n", + " --preprocessing_num_workers 1 \\\n", + " --block_size 1024 \\\n", + " --output_dir outputs-pt-v1 \\\n", + " --overwrite_output_dir \\\n", + " --ddp_timeout 30000 \\\n", + " --logging_first_step True \\\n", + " --target_modules all \\\n", + " --lora_rank 8 \\\n", + " --lora_alpha 16 \\\n", + " --lora_dropout 0.05 \\\n", + " --torch_dtype float16 \\\n", + " --device_map auto \\\n", + " --report_to tensorboard \\\n", + " --ddp_find_unused_parameters False \\\n", + " --gradient_checkpointing True" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%ls outputs-pt-v1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "模型训练结果:\n", + "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", + "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" + ] + }, + { + "cell_type": "markdown", + "source": [ + "lora模型权重合并到base model,合并后的模型保存在`--output_dir`目录下,合并方法如下:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python merge_peft_adapter.py --model_type bloom \\\n", + " --base_model_name_or_path bigscience/bloomz-560m --peft_model_path outputs-pt-v1 --output_dir merged-pt/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh merged-pt/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%cat merged-pt/config.json" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stage1 增量预训练完成。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "start_time": "2023-06-15T13:56:17.032821Z", + "end_time": "2023-06-15T13:56:17.081153Z" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage 2: Supervised FineTuning\n", + "\n", + "第二阶段:SFT(Supervised Fine-tuning)有监督微调,构造指令微调数据集,在预训练模型基础上做指令精调,以对齐指令意图\n", + "\n", + "| Stage 2: Supervised Fine-tuning | [supervised_finetuning.py](https://github.com/shibing624/MedicalGPT/blob/main/supervised_finetuning.py) | [run_sft.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_sft.sh) |" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### 说明:\n", + "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", + "\n", + "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m` 或者 Stage1得到的预训练模型\n", + "2. 数据集:SFT阶段使用的是使用的是Belle的1千条抽样数据,位于`data/finetune`文件夹" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Stage2 咱们开始吧\n", + "\n", + "训练步骤如下:\n", + "\n", + "1. 确认训练集\n", + "2. 执行训练脚本\n", + "\n", + "训练脚本的执行逻辑如下:\n", + "1. 导入依赖包\n", + "2. 设置参数\n", + "3. 定义各函数并加载训练集\n", + "4. 加载模型和tokenizer\n", + "5. 开始训练并评估\n", + "6. 查看训练结果" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls ./data/finetune" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-06-15T13:58:38.778132Z", + "end_time": "2023-06-15T13:58:38.966506Z" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python supervised_finetuning.py \\\n", + " --model_type bloom \\\n", + " --model_name_or_path merged-pt \\\n", + " --train_file_dir ./data/finetune \\\n", + " --validation_file_dir ./data/finetune \\\n", + " --per_device_train_batch_size 4 \\\n", + " --per_device_eval_batch_size 4 \\\n", + " --do_train \\\n", + " --do_eval \\\n", + " --use_peft True \\\n", + " --fp16 \\\n", + " --max_train_samples 1000 \\\n", + " --max_eval_samples 10 \\\n", + " --num_train_epochs 1 \\\n", + " --learning_rate 2e-5 \\\n", + " --warmup_ratio 0.05 \\\n", + " --weight_decay 0.05 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 10 \\\n", + " --eval_steps 50 \\\n", + " --evaluation_strategy steps \\\n", + " --save_steps 500 \\\n", + " --save_strategy steps \\\n", + " --save_total_limit 3 \\\n", + " --gradient_accumulation_steps 1 \\\n", + " --preprocessing_num_workers 1 \\\n", + " --max_source_length 256 \\\n", + " --max_target_length 256 \\\n", + " --output_dir outputs-sft-v1 \\\n", + " --overwrite_output_dir \\\n", + " --ddp_timeout 30000 \\\n", + " --logging_first_step True \\\n", + " --target_modules all \\\n", + " --lora_rank 8 \\\n", + " --lora_alpha 16 \\\n", + " --lora_dropout 0.05 \\\n", + " --torch_dtype float16 \\\n", + " --device_map auto \\\n", + " --report_to tensorboard \\\n", + " --ddp_find_unused_parameters False \\\n", + " --gradient_checkpointing True" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls outputs-sft-v1" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "模型训练结果:\n", + "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", + "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "lora模型权重合并到base model,合并后的模型保存在`--output_dir`目录下,合并方法如下:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python merge_peft_adapter.py --model_type bloom \\\n", + " --base_model_name_or_path merged-pt --peft_model_path outputs-sft-v1 --output_dir merged-sft/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh merged-sft/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%cat merged-sft/config.json" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Stage2 SFT训练完成。" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-06-15T14:07:40.731186Z", + "end_time": "2023-06-15T14:07:40.752635Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Stage 3: Reward Modeling\n", + "\n", + "第三阶段:RM(Reward Model)奖励模型建模,构造人类偏好排序数据集,训练奖励模型,用来对齐人类偏好,主要是\"HHH\"原则,具体是\"helpful, honest, harmless\"\n", + "\n", + "| Stage 3: Reward Modeling | [reward_modeling.py](https://github.com/shibing624/MedicalGPT/blob/main/reward_modeling.py) | [run_rm.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rm.sh) |" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### 说明:\n", + "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", + "\n", + "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m` 或者 Stage2得到的SFT模型\n", + "2. 数据集:RM阶段使用的是医疗reward数据,抽样了500条,位于`data/reward`文件夹" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Stage3 咱们开始吧\n", + "\n", + "训练步骤如下:\n", + "\n", + "1. 确认训练集\n", + "2. 执行训练脚本\n", + "\n", + "训练脚本的执行逻辑如下:\n", + "1. 导入依赖包\n", + "2. 设置参数\n", + "3. 定义各函数并加载训练集\n", + "4. 加载模型和tokenizer\n", + "5. 开始训练并评估\n", + "6. 查看训练结果" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls ./data/reward/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python reward_modeling.py \\\n", + " --model_type bloom \\\n", + " --model_name_or_path merged-sft \\\n", + " --train_file_dir ./data/reward \\\n", + " --validation_file_dir ./data/reward \\\n", + " --per_device_train_batch_size 3 \\\n", + " --per_device_eval_batch_size 1 \\\n", + " --do_train \\\n", + " --use_peft True \\\n", + " --seed 42 \\\n", + " --max_train_samples 1000 \\\n", + " --max_eval_samples 10 \\\n", + " --num_train_epochs 1 \\\n", + " --learning_rate 2e-5 \\\n", + " --warmup_ratio 0.05 \\\n", + " --weight_decay 0.001 \\\n", + " --logging_strategy steps \\\n", + " --logging_steps 10 \\\n", + " --eval_steps 50 \\\n", + " --evaluation_strategy steps \\\n", + " --save_steps 500 \\\n", + " --save_strategy steps \\\n", + " --save_total_limit 3 \\\n", + " --max_source_length 256 \\\n", + " --max_target_length 256 \\\n", + " --output_dir outputs-rm-v1 \\\n", + " --overwrite_output_dir \\\n", + " --ddp_timeout 30000 \\\n", + " --logging_first_step True \\\n", + " --target_modules all \\\n", + " --lora_rank 8 \\\n", + " --lora_alpha 16 \\\n", + " --lora_dropout 0.05 \\\n", + " --torch_dtype float32 \\\n", + " --device_map auto \\\n", + " --report_to tensorboard \\\n", + " --ddp_find_unused_parameters False \\\n", + " --remove_unused_columns False \\\n", + " --gradient_checkpointing True" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh outputs-rm-v1" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "模型训练结果:\n", + "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", + "- 日志保存在`output_dir/runs`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/runs --host 0.0.0.0 --port 8009`" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "lora模型权重合并到base model,合并后的模型保存在`--output_dir`目录下,合并方法如下:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python merge_peft_adapter.py --model_type bloom \\\n", + " --base_model_name_or_path merged-sft --peft_model_path outputs-rm-v1 --output_dir merged-rm/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh merged-rm/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%cat merged-rm/config.json" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Stage3 奖励建模第一次训练完成。" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "start_time": "2023-06-15T14:12:09.464881Z", + "end_time": "2023-06-15T14:12:09.472414Z" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Stage 4: Reinforcement Learning Training\n", + "\n", + "第四阶段:RL(Reinforcement Learning)基于人类反馈的强化学习(RLHF),用奖励模型来训练SFT模型,生成模型使用奖励或惩罚来更新其策略,以便生成更高质量、更符合人类偏好的文本\n", + "\n", + "| Stage 4: Reinforcement Learning | [rl_training.py](https://github.com/shibing624/MedicalGPT/blob/main/rl_training.py) | [run_rl.sh](https://github.com/shibing624/MedicalGPT/blob/main/run_rl.sh) |\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### 说明:\n", + "以下 notebook/colab 代码为了快速验证训练代码可用,我们使用了小size的生成模型、奖励模型和小样本数据集,实际使用时,需要使用更大的模型和数据集,以获得更好的效果。\n", + "\n", + "1. 生成模型:使用的是Bloom的`bigscience/bloomz-560m` 或者 Stage2得到的SFT模型\n", + "2. 奖励模型:使用的是`OpenAssistant/reward-model-deberta-v3-large-v2` 或者 Stage3得到的BERT类或者GPT类奖励模型\n", + "3. 数据集:RL阶段的数据可以复用SFT的数据集,使用的是Belle的1千条抽样数据,位于`data/finetune`文件夹" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Stage4 咱们开始吧\n", + "\n", + "训练步骤如下:\n", + "\n", + "1. 确认训练集\n", + "2. 执行训练脚本\n", + "\n", + "训练脚本的执行逻辑如下:\n", + "1. 导入依赖包\n", + "2. 设置参数\n", + "3. 定义各函数并加载训练集\n", + "4. 加载生成模型和tokenizer,加载奖励模型和其tokenizer\n", + "5. 开始训练并评估\n", + "6. 查看训练结果\n", + "\n", + "以下参数可以根据你的GPU实际情况修改,当前参数是根据Colab的T4单卡GPU(16GB显存)配置的。" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls ./data/finetune/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python rl_training.py \\\n", + " --model_type bloom \\\n", + " --model_name_or_path merged-sft \\\n", + " --reward_model_name_or_path merged-rm \\\n", + " --torch_dtype float16 \\\n", + " --device_map auto \\\n", + " --train_file_dir ./data/finetune \\\n", + " --validation_file_dir ./data/finetune \\\n", + " --batch_size 4 \\\n", + " --max_source_length 256 \\\n", + " --max_target_length 256 \\\n", + " --max_train_samples 1000 \\\n", + " --use_peft True \\\n", + " --lora_rank 8 \\\n", + " --lora_alpha 16 \\\n", + " --lora_dropout 0.05 \\\n", + " --do_train \\\n", + " --max_steps 64 \\\n", + " --learning_rate 1e-5 \\\n", + " --save_steps 50 \\\n", + " --output_dir outputs-rl-v1 \\\n", + " --early_stopping True \\\n", + " --target_kl 0.1 \\\n", + " --reward_baseline 0.0" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh outputs-rl-v1" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "模型训练结果:\n", + "- 使用lora训练模型,则保存的lora权重是`adapter_model.bin`, lora配置文件是`adapter_config.json`,合并到base model的方法见`merge_peft_adapter.py`\n", + "- 日志保存在`output_dir/trl`目录下,可以使用tensorboard查看,启动tensorboard方式如下:`tensorboard --logdir output_dir/trl --host 0.0.0.0 --port 8009`" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "lora模型权重合并到base model,合并后的模型保存在`--output_dir`目录下,合并方法如下:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "!python merge_peft_adapter.py --model_type bloom \\\n", + " --base_model_name_or_path merged-sft --peft_model_path outputs-rl-v1 --output_dir merged-rl/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%ls -lh merged-rl/" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "%cat merged-rl/config.json" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Stage4 RL第一次训练完成。\n", + "\n", + "**至此一个完整的4阶段训练流程演示完成。**" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "实际操作中Stage3和Stage4可以反复多次,直到RL得到的最后模型满足评估要求。\n", + "\n", + "RLHF过程可以把SFT模型当成一个初始化模型,RM模型当做指导老师,使用RL(PPO)调教SFT模型生成指导老师最满意的结果,如果小学老师满意了,我们就再训练一个中学老师,继续指导,中学老师满意了,就训练一个大学老师,这样不断迭代,使得生成模型的质量达到甚至超过人工撰写的天花板。\n", + "\n", + "RLHF训练不易,此项目希望抛砖引玉,给大家一种实现的方法和参考,共同促进中文开源LLM发展。" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "完。" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "f34eed0bebedfc4b6ee51ced43d2c030fe3b92f13c149d072205ca200a67b1ec" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}