From 0210776120ead5fbefe46041dcd687a070625a51 Mon Sep 17 00:00:00 2001 From: MZ Zhuo Date: Wed, 16 Aug 2023 17:07:18 +0800 Subject: [PATCH] Update pretraining.py fix eval file path --- pretraining.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pretraining.py b/pretraining.py index e1210a5..9cdf11e 100644 --- a/pretraining.py +++ b/pretraining.py @@ -457,7 +457,7 @@ def group_texts(examples): raise ValueError(f"train files must be same type, e.g. all txt or all jsonl, but got {types}") data_files["train"] = train_data_files if data_args.validation_file_dir is not None and os.path.exists(data_args.validation_file_dir): - eval_data_files = glob(f'{data_args.train_file_dir}/**/*.txt', recursive=True) + glob( + eval_data_files = glob(f'{data_args.validation_file_dir}/**/*.txt', recursive=True) + glob( f'{data_args.train_file_dir}/**/*.json', recursive=True) + glob( f'{data_args.train_file_dir}/**/*.jsonl', recursive=True) logger.info(f"eval files: {eval_data_files}")