Fixed batch_size_per_device and batch_size misuse in LazyLLM (#377)

LazyAGI · Dec 3, 2024 · b8bea00 · b8bea00
1 parent 91fc585
commit b8bea00
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/lazyllm/components/finetune/llamafactory/sft.yaml b/lazyllm/components/finetune/llamafactory/sft.yaml
@@ -78,7 +78,7 @@ per_device_train_batch_size: 1
 per_device_eval_batch_size: 1
 per_gpu_train_batch_size: null
 per_gpu_eval_batch_size: null
-gradient_accumulation_steps: 8
+gradient_accumulation_steps: 1
 eval_accumulation_steps: null
 eval_delay: 0
 learning_rate: 1.0e-04

diff --git a/lazyllm/tools/train_service/client.py b/lazyllm/tools/train_service/client.py
@@ -79,7 +79,7 @@ def train(self, train_config, token):
                 'num_train_epochs': train_config['num_epochs'],
                 'learning_rate': train_config['learning_rate'],
                 'lr_scheduler_type': train_config['lr_scheduler_type'],
-                'per_device_train_batch_size': train_config['batch_size'],
+                'per_device_train_batch_size': train_config['batch_size'] // train_config['num_gpus'],
                 'cutoff_len': train_config['cutoff_len'],
                 'lora_r': train_config['lora_r'],
                 'lora_alpha': train_config['lora_alpha'],