From 95bcad8e455fc257754c68a332b5722329f580cc Mon Sep 17 00:00:00 2001 From: Min Shi Date: Wed, 27 Mar 2024 14:48:32 +0800 Subject: [PATCH] add comment for excluding aggregation nodes in previous node run infos --- src/promptflow/promptflow/batch/_batch_engine.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/promptflow/promptflow/batch/_batch_engine.py b/src/promptflow/promptflow/batch/_batch_engine.py index c84c6a9653e..6c52fbc00af 100644 --- a/src/promptflow/promptflow/batch/_batch_engine.py +++ b/src/promptflow/promptflow/batch/_batch_engine.py @@ -250,8 +250,21 @@ def _copy_previous_run_result( previous_run_info.root_run_id = run_id previous_run_info.parent_run_id = run_id - # Load previous node run info and remove aggregation nodes in case it is loaded into node run info + # Load previous node run info previous_node_run_infos = resume_from_run_storage.load_node_run_info_for_line(i) + + # In storage, aggregation nodes are persisted with filenames similar to regular nodes. + # Currently we read regular node run records by filename in the node artifacts folder, + # which may lead to load records of aggregation nodes at the same time, which is not intended. + # E.g, aggregation-node/000000000.jsonl will be treated as the node_run_info of the first line: + # node_artifacts/ + # ├─ non-aggregation-node/ + # │ ├─ 000000000.jsonl + # │ ├─ 000000001.jsonl + # │ ├─ 000000002.jsonl + # ├─ aggregation-node/ + # │ ├─ 000000000.jsonl + # So we filter out aggregation nodes since line records should not contain any info about them. previous_node_run_infos = [ run_info for run_info in previous_node_run_infos if run_info.node not in aggregation_nodes ]