From 69e4db52a5d161e84190a9821d3dc0ccd097182a Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Fri, 12 Aug 2022 17:36:42 +0200 Subject: [PATCH 1/4] enable isolated node eval for answer generator nodes --- haystack/nodes/answer_generator/base.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/haystack/nodes/answer_generator/base.py b/haystack/nodes/answer_generator/base.py index 115b3ae43a..32a84c9319 100644 --- a/haystack/nodes/answer_generator/base.py +++ b/haystack/nodes/answer_generator/base.py @@ -4,7 +4,7 @@ from tqdm.auto import tqdm from haystack.errors import HaystackError -from haystack.schema import Answer, Document +from haystack.schema import Answer, Document, MultiLabel, Label from haystack.nodes.base import BaseComponent @@ -31,13 +31,20 @@ def predict(self, query: str, documents: List[Document], top_k: Optional[int]) - """ pass - def run(self, query: str, documents: List[Document], top_k: Optional[int] = None): # type: ignore + def run(self, query: str, documents: List[Document], top_k: Optional[int] = None, labels: Optional[MultiLabel] = None, add_isolated_node_eval: bool = False): # type: ignore if documents: results = self.predict(query=query, documents=documents, top_k=top_k) else: results = {"answers": []} + + # run evaluation with labels as node inputs + if add_isolated_node_eval and labels is not None: + relevant_documents = {label.document.id: label.document for label in labels.labels}.values() + results_label_input = self.predict(query=query, documents=relevant_documents, top_k=top_k) + results["answers_isolated"] = results_label_input["answers"] + return results, "output_1" def run_batch( # type: ignore From f7c927fd38ad97413598bb7acaa5d592612dc38d Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Fri, 12 Aug 2022 18:04:29 +0200 Subject: [PATCH 2/4] adjust comment --- haystack/nodes/answer_generator/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/haystack/nodes/answer_generator/base.py b/haystack/nodes/answer_generator/base.py index 32a84c9319..102a6d0496 100644 --- a/haystack/nodes/answer_generator/base.py +++ b/haystack/nodes/answer_generator/base.py @@ -38,8 +38,7 @@ def run(self, query: str, documents: List[Document], top_k: Optional[int] = None else: results = {"answers": []} - - # run evaluation with labels as node inputs + # run evaluation with "perfect" labels as node inputs to calculate "upper bound" metrics for just this node if add_isolated_node_eval and labels is not None: relevant_documents = {label.document.id: label.document for label in labels.labels}.values() results_label_input = self.predict(query=query, documents=relevant_documents, top_k=top_k) From a5a64bfb2b0919bff1a19da88a63fd819e21c095 Mon Sep 17 00:00:00 2001 From: Malte Pietsch Date: Fri, 12 Aug 2022 18:06:02 +0200 Subject: [PATCH 3/4] remove unused import --- haystack/nodes/answer_generator/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/nodes/answer_generator/base.py b/haystack/nodes/answer_generator/base.py index 102a6d0496..17b00ff30a 100644 --- a/haystack/nodes/answer_generator/base.py +++ b/haystack/nodes/answer_generator/base.py @@ -4,7 +4,7 @@ from tqdm.auto import tqdm from haystack.errors import HaystackError -from haystack.schema import Answer, Document, MultiLabel, Label +from haystack.schema import Answer, Document, MultiLabel from haystack.nodes.base import BaseComponent From d5793fc7b47c02d2c7bb76bc9ec9b0a1f7cad833 Mon Sep 17 00:00:00 2001 From: tstadel <60758086+tstadel@users.noreply.github.com> Date: Sun, 14 Aug 2022 10:48:07 +0200 Subject: [PATCH 4/4] fix mypy --- haystack/nodes/answer_generator/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/nodes/answer_generator/base.py b/haystack/nodes/answer_generator/base.py index 17b00ff30a..066b55050c 100644 --- a/haystack/nodes/answer_generator/base.py +++ b/haystack/nodes/answer_generator/base.py @@ -40,7 +40,7 @@ def run(self, query: str, documents: List[Document], top_k: Optional[int] = None # run evaluation with "perfect" labels as node inputs to calculate "upper bound" metrics for just this node if add_isolated_node_eval and labels is not None: - relevant_documents = {label.document.id: label.document for label in labels.labels}.values() + relevant_documents = list({label.document.id: label.document for label in labels.labels}.values()) results_label_input = self.predict(query=query, documents=relevant_documents, top_k=top_k) results["answers_isolated"] = results_label_input["answers"]