diff --git a/src/promptflow/promptflow/batch/_batch_engine.py b/src/promptflow/promptflow/batch/_batch_engine.py index c84c6a9653e..6c52fbc00af 100644 --- a/src/promptflow/promptflow/batch/_batch_engine.py +++ b/src/promptflow/promptflow/batch/_batch_engine.py @@ -250,8 +250,21 @@ def _copy_previous_run_result( previous_run_info.root_run_id = run_id previous_run_info.parent_run_id = run_id - # Load previous node run info and remove aggregation nodes in case it is loaded into node run info + # Load previous node run info previous_node_run_infos = resume_from_run_storage.load_node_run_info_for_line(i) + + # In storage, aggregation nodes are persisted with filenames similar to regular nodes. + # Currently we read regular node run records by filename in the node artifacts folder, + # which may lead to load records of aggregation nodes at the same time, which is not intended. + # E.g, aggregation-node/000000000.jsonl will be treated as the node_run_info of the first line: + # node_artifacts/ + # ├─ non-aggregation-node/ + # │ ├─ 000000000.jsonl + # │ ├─ 000000001.jsonl + # │ ├─ 000000002.jsonl + # ├─ aggregation-node/ + # │ ├─ 000000000.jsonl + # So we filter out aggregation nodes since line records should not contain any info about them. previous_node_run_infos = [ run_info for run_info in previous_node_run_infos if run_info.node not in aggregation_nodes ]