Marker-Inc-Korea · e7217 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 26, 2024
diff --git a/autorag/data/parse/run.py b/autorag/data/parse/run.py
@@ -107,7 +107,16 @@ def run_parser(
 				module_params,
 			)
 		)
-	list(map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths)))
+
+	_files = {}
+	for result, filepath in zip(results, filepaths):
+		_files[filepath].append(result) if filepath in _files.keys() else _files.update(
+			{filepath: [result]}
+		)
+	# Save files with a specific file type as Parquet files.
+	for filepath, value in _files.items():
+		pd.concat(value).to_parquet(filepath, index=False)
+
 	filenames = list(map(lambda x: os.path.basename(x), filepaths))
 
 	summary_df = pd.DataFrame(
@@ -121,8 +130,9 @@ def run_parser(
 	summary_df.to_csv(os.path.join(project_dir, "summary.csv"), index=False)
 
 	# concat all parquet files here if not all_files.
+	_filepaths = list(_files.keys())
 	if not all_files:
-		dataframes = [pd.read_parquet(file) for file in filepaths]
+		dataframes = [pd.read_parquet(file) for file in _filepaths]
 		combined_df = pd.concat(dataframes, ignore_index=True)
 		combined_df.to_parquet(
 			os.path.join(project_dir, "parsed_result.parquet"), index=False