From a6e96470da5e49c5aa5d19e7fbdbaec14370db82 Mon Sep 17 00:00:00 2001 From: shibing624 Date: Wed, 17 Apr 2024 15:00:35 +0800 Subject: [PATCH] update orpo shell. --- run_orpo.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 run_orpo.sh diff --git a/run_orpo.sh b/run_orpo.sh new file mode 100644 index 0000000..108dca1 --- /dev/null +++ b/run_orpo.sh @@ -0,0 +1,30 @@ +CUDA_VISIBLE_DEVICES=0,1 python orpo_training.py \ + --model_type bloom \ + --model_name_or_path bigscience/bloomz-560m \ + --train_file_dir ./data/reward \ + --validation_file_dir ./data/reward \ + --per_device_train_batch_size 4 \ + --per_device_eval_batch_size 1 \ + --do_train \ + --do_eval \ + --use_peft True \ + --max_train_samples 1000 \ + --max_eval_samples 10 \ + --max_steps 100 \ + --eval_steps 20 \ + --save_steps 50 \ + --max_source_length 128 \ + --max_target_length 128 \ + --output_dir outputs-orpo-bloom-v1 \ + --target_modules all \ + --lora_rank 8 \ + --lora_alpha 16 \ + --lora_dropout 0.05 \ + --torch_dtype float16 \ + --fp16 True \ + --device_map auto \ + --report_to tensorboard \ + --remove_unused_columns False \ + --gradient_checkpointing True \ + --orpo_beta 0.1 \ + --cache_dir ./cache