From bfd1f5baf0835dac110daa548c775c860821aa16 Mon Sep 17 00:00:00 2001 From: Billccx_server Date: Tue, 5 Sep 2023 10:38:04 +0800 Subject: [PATCH] change validation_file_dir path --- pretraining.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pretraining.py b/pretraining.py index 7e5b2ee..0038450 100644 --- a/pretraining.py +++ b/pretraining.py @@ -458,8 +458,8 @@ def group_texts(examples): data_files["train"] = train_data_files if data_args.validation_file_dir is not None and os.path.exists(data_args.validation_file_dir): eval_data_files = glob(f'{data_args.validation_file_dir}/**/*.txt', recursive=True) + glob( - f'{data_args.train_file_dir}/**/*.json', recursive=True) + glob( - f'{data_args.train_file_dir}/**/*.jsonl', recursive=True) + f'{data_args.validation_file_dir}/**/*.json', recursive=True) + glob( + f'{data_args.validation_file_dir}/**/*.jsonl', recursive=True) logger.info(f"eval files: {eval_data_files}") data_files["validation"] = eval_data_files # Train data files must be same type, e.g. all txt or all jsonl