From c957ea3831924652495fb79ccfb1a46b655e7fcf Mon Sep 17 00:00:00 2001 From: Qiyuan Gong Date: Tue, 14 May 2024 13:43:59 +0800 Subject: [PATCH] Add axolotl main support and axolotl Llama-3-8B QLoRA example (#10984) * Support axolotl main (796a085). * Add axolotl Llama-3-8B QLoRA example. * Change `sequence_len` to 256 for alpaca, and revert `lora_r` value. * Add example to quick_start. --- .../doc/LLM/Quickstart/axolotl_quickstart.md | 96 ++++++++++++++++++- .../GPU/LLM-Finetuning/axolotl/README.md | 67 ++++++++++++- .../LLM-Finetuning/axolotl/llama3-qlora.yml | 72 ++++++++++++++ .../GPU/LLM-Finetuning/axolotl/lora.yml | 4 +- .../GPU/LLM-Finetuning/axolotl/qlora.yml | 4 +- 5 files changed, 233 insertions(+), 10 deletions(-) create mode 100644 python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml diff --git a/docs/readthedocs/source/doc/LLM/Quickstart/axolotl_quickstart.md b/docs/readthedocs/source/doc/LLM/Quickstart/axolotl_quickstart.md index afbdc7c9234..8c3a28e18dd 100644 --- a/docs/readthedocs/source/doc/LLM/Quickstart/axolotl_quickstart.md +++ b/docs/readthedocs/source/doc/LLM/Quickstart/axolotl_quickstart.md @@ -134,7 +134,7 @@ Modify LoRA parameters, such as `lora_r` and `lora_alpha`, etc. adapter: lora lora_model_dir: -lora_r: 16 +lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 lora_target_linear: true @@ -178,7 +178,7 @@ Modify QLoRA parameters, such as `lora_r` and `lora_alpha`, etc. adapter: qlora lora_model_dir: -lora_r: 16 +lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: @@ -198,6 +198,98 @@ In Axolotl v0.4.0, you can use `train.py` instead of `-m axolotl.cli.train` or ` accelerate launch train.py qlora.yml ``` +### 3. Finetune Llama-3-8B (Experimental) + +Warning: this section will install axolotl main ([796a085](https://github.com/OpenAccess-AI-Collective/axolotl/tree/796a085b2f688f4a5efe249d95f53ff6833bf009)) for new features, e.g., Llama-3-8B. + +#### 3.1 Install Axolotl main in conda + +Axolotl main has lots of new dependencies. Please setup a new conda env for this version. + +```cmd +conda create -n llm python=3.11 +conda activate llm +# install axolotl main +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl && git checkout 796a085 +pip install -e . +# below command will install intel_extension_for_pytorch==2.1.10+xpu as default +pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +# install transformers etc +pip install accelerate==0.23.0 +# to avoid https://github.com/OpenAccess-AI-Collective/axolotl/issues/1544 +pip install datasets==2.15.0 +pip install transformers==4.37.0 +``` + +Config accelerate and oneAPIs, according to [Set Environment Variables](#22-set-environment-variables). + +#### 3.2 Alpaca QLoRA + +Based on [axolotl Llama-3 QLoRA example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/examples/llama-3/qlora.yml). + +Prepare `llama3-qlora.yml` for QLoRA finetune. You can download a template from github. + +```cmd +wget https://raw.githubusercontent.com/intel-analytics/ipex-llm/main/python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml +``` + +**If you are using the offline model and dataset in local env**, please modify the model path and dataset path in `llama3-qlora.yml`. Otherwise, keep them unchanged. + +```yaml +# Please change to local path if model is offline, e.g., /path/to/model/Meta-Llama-3-8B +base_model: meta-llama/Meta-Llama-3-8B +datasets: + # Please change to local path if dataset is offline, e.g., /path/to/dataset/alpaca_2k_test + - path: aaditya/alpaca_subset_1 + type: alpaca +``` + +Modify QLoRA parameters, such as `lora_r` and `lora_alpha`, etc. + +```yaml +adapter: qlora +lora_model_dir: + +sequence_len: 256 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +``` + +```cmd +accelerate launch finetune.py llama3-qlora.yml +``` + +You can also use `train.py` instead of `-m axolotl.cli.train` or `finetune.py`. + +```cmd +accelerate launch train.py llama3-qlora.yml +``` + +Expected output + +```cmd +{'loss': 0.237, 'learning_rate': 1.2254711850265387e-06, 'epoch': 3.77} +{'loss': 0.6068, 'learning_rate': 1.1692453482951115e-06, 'epoch': 3.77} +{'loss': 0.2926, 'learning_rate': 1.1143322458989303e-06, 'epoch': 3.78} +{'loss': 0.2475, 'learning_rate': 1.0607326072295087e-06, 'epoch': 3.78} +{'loss': 0.1531, 'learning_rate': 1.008447144232094e-06, 'epoch': 3.79} +{'loss': 0.1799, 'learning_rate': 9.57476551396197e-07, 'epoch': 3.79} +{'loss': 0.2724, 'learning_rate': 9.078215057463868e-07, 'epoch': 3.79} +{'loss': 0.2534, 'learning_rate': 8.594826668332445e-07, 'epoch': 3.8} +{'loss': 0.3388, 'learning_rate': 8.124606767246579e-07, 'epoch': 3.8} +{'loss': 0.3867, 'learning_rate': 7.667561599972505e-07, 'epoch': 3.81} +{'loss': 0.2108, 'learning_rate': 7.223697237281668e-07, 'epoch': 3.81} +{'loss': 0.0792, 'learning_rate': 6.793019574868775e-07, 'epoch': 3.82} +``` + ## Troubleshooting #### TypeError: PosixPath diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md index 773202e6974..7a019e7f01b 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/README.md @@ -69,13 +69,13 @@ This example shows how to run [Alpaca LoRA training](https://github.com/tloen/al Based on [axolotl Llama-2 LoRA example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/v0.4.0/examples/llama-2/lora.yml). -``` +```bash accelerate launch finetune.py lora.yml ``` In v0.4.0, you can also use `train.py` instead of `-m axolotl.cli.train` or `finetune.py`. -``` +```bash accelerate launch train.py lora.yml ``` @@ -85,13 +85,13 @@ Based on [axolotl Llama-2 QLoRA example](https://github.com/OpenAccess-AI-Collec Modify parameters in `qlora.yml` based on your requirements. Then, launch finetuning with the following command. -``` +```bash accelerate launch finetune.py qlora.yml ``` In v0.4.0, you can also use `train.py` instead of `-m axolotl.cli.train` or `finetune.py`. -``` +```bash accelerate launch train.py qlora.yml ``` @@ -113,3 +113,62 @@ Output in console {'loss': 0.9651, 'learning_rate': 0.00019189578116202307, 'epoch': 0.54} {'loss': 0.9067, 'learning_rate': 0.00019107766703887764, 'epoch': 0.56} ``` + +### 4. Finetune Llama-3-8B (Experimental) + +Warning: this section will install axolotl main ([796a085](https://github.com/OpenAccess-AI-Collective/axolotl/tree/796a085b2f688f4a5efe249d95f53ff6833bf009)) for new features, e.g., Llama-3-8B. + +#### 4.1 Install Axolotl main in conda + +Axolotl main has lots of new dependencies. Please setup a new conda env for this version. + +```bash +conda create -n llm python=3.11 +conda activate llm +# install axolotl main +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl && git checkout 796a085 +pip install -e . +# below command will install intel_extension_for_pytorch==2.1.10+xpu as default +pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +# install transformers etc +pip install accelerate==0.23.0 +# to avoid https://github.com/OpenAccess-AI-Collective/axolotl/issues/1544 +pip install datasets==2.15.0 +pip install transformers==4.37.0 +``` + +Config accelerate and oneAPIs, according to [Configures OneAPI environment variables and accelerate](#2-configures-oneapi-environment-variables-and-accelerate). + +#### 4.2 Alpaca QLoRA + +Based on [axolotl Llama-3 QLoRA example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/examples/llama-3/qlora.yml). + +Modify parameters in `llama3-qlora.yml` based on your requirements. Then, launch finetuning with the following command. + +```bash +accelerate launch finetune.py llama3-qlora.yml +``` + +You can also use `train.py` instead of `-m axolotl.cli.train` or `finetune.py`. + +```bash +accelerate launch train.py llama3-qlora.yml +``` + +Expected output + +```bash +{'loss': 0.237, 'learning_rate': 1.2254711850265387e-06, 'epoch': 3.77} +{'loss': 0.6068, 'learning_rate': 1.1692453482951115e-06, 'epoch': 3.77} +{'loss': 0.2926, 'learning_rate': 1.1143322458989303e-06, 'epoch': 3.78} +{'loss': 0.2475, 'learning_rate': 1.0607326072295087e-06, 'epoch': 3.78} +{'loss': 0.1531, 'learning_rate': 1.008447144232094e-06, 'epoch': 3.79} +{'loss': 0.1799, 'learning_rate': 9.57476551396197e-07, 'epoch': 3.79} +{'loss': 0.2724, 'learning_rate': 9.078215057463868e-07, 'epoch': 3.79} +{'loss': 0.2534, 'learning_rate': 8.594826668332445e-07, 'epoch': 3.8} +{'loss': 0.3388, 'learning_rate': 8.124606767246579e-07, 'epoch': 3.8} +{'loss': 0.3867, 'learning_rate': 7.667561599972505e-07, 'epoch': 3.81} +{'loss': 0.2108, 'learning_rate': 7.223697237281668e-07, 'epoch': 3.81} +{'loss': 0.0792, 'learning_rate': 6.793019574868775e-07, 'epoch': 3.82} +``` diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml b/python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml new file mode 100644 index 00000000000..401f4c10445 --- /dev/null +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/llama3-qlora.yml @@ -0,0 +1,72 @@ +# This file is copied from https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/examples/llama-3/qlora.yml +base_model: meta-llama/Meta-Llama-3-8B +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: aaditya/alpaca_subset_1 + type: alpaca +dataset_prepared_path: +val_set_size: 0 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 256 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 1 +num_epochs: 4 +# paged_adamw_32bit is not supported +# due to bitsandbytes issue https://github.com/TimDettmers/bitsandbytes/issues/1180 +# optimizer: paged_adamw_32bit +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +# flash_attention is not supported +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|end_of_text|>" diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/lora.yml b/python/llm/example/GPU/LLM-Finetuning/axolotl/lora.yml index 20cd8f73568..b77612c7476 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/lora.yml +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/lora.yml @@ -14,13 +14,13 @@ dataset_prepared_path: val_set_size: 0.05 output_dir: ./lora-out -sequence_len: 4096 +sequence_len: 256 sample_packing: true pad_to_sequence_len: true adapter: lora lora_model_dir: -lora_r: 16 +lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 lora_target_linear: true diff --git a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml index b76eeae2d5d..b18efd4ed28 100644 --- a/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml +++ b/python/llm/example/GPU/LLM-Finetuning/axolotl/qlora.yml @@ -18,11 +18,11 @@ output_dir: ./qlora-out adapter: qlora lora_model_dir: -sequence_len: 4096 +sequence_len: 256 sample_packing: true pad_to_sequence_len: true -lora_r: 16 +lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: