pass documents via labels to eval

deepset-ai · Feb 7, 2022 · dcc51e4 · dcc51e4
1 parent bf240c7
commit dcc51e4
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 10 deletions.
diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
@@ -439,17 +439,16 @@ def run(  # type: ignore
     def eval(
         self,
         labels: List[MultiLabel],
-        documents: Optional[List[List[Document]]] = None,
         params: Optional[dict] = None,
         sas_model_name_or_path: str = None,
         add_isolated_node_eval: bool = False,
+        pass_documents_as_input: bool = False
     ) -> EvaluationResult:
         """
         Evaluates the pipeline by running the pipeline once per query in debug mode
         and putting together all data that is needed for evaluation, e.g. calculating metrics.
 
         :param labels: The labels to evaluate on
-        :param documents: List of List of Document that the first node in the pipeline should get as input per multilabel. Can be used to evaluate a pipeline that consists of a reader without a retriever.
         :param params: Dictionary of parameters to be dispatched to the nodes.
                     If you want to pass a param to all nodes, you can just use: {"top_k":10}
                     If you want to pass it to targeted nodes, you can do:
@@ -473,25 +472,23 @@ def eval(
                     To this end, labels are used as input to the node instead of the output of the previous node in the pipeline.
                     The generated dataframes in the EvaluationResult then contain additional rows, which can be distinguished from the integrated evaluation results based on the
                     values "integrated" or "isolated" in the column "eval_mode" and the evaluation report then additionally lists the upper bound of each node's evaluation metrics.
+        :param documents: If set to True, the documents specified in the labels are passed as input to the first node in the pipeline. Can be used to evaluate a pipeline that consists of a reader without a retriever.
         """
         eval_result = EvaluationResult()
         if add_isolated_node_eval:
             if params is None:
                 params = {}
             params["add_isolated_node_eval"] = True
-        for i,label in enumerate(labels):
+        for label in labels:
             params_per_label = copy.deepcopy(params)
             if label.filters is not None:
                 if params_per_label is None:
                     params_per_label = {"filters": label.filters}
                 else:
                     # join both filters and overwrite filters in params with filters in labels
                     params_per_label["filters"] = {**params_per_label.get("filters", {}), **label.filters}
-            if documents is not None:
-                # documents are passed as input to the first node in the pipeline
-                predictions = self.run(query=label.query, labels=label, documents=documents[i], params=params_per_label, debug=True)
-            else:
-                predictions = self.run(query=label.query, labels=label, params=params_per_label, debug=True)
+            documents = [l.document for l in label.labels] if pass_documents_as_input else None
+            predictions = self.run(query=label.query, labels=label, documents=documents, params=params_per_label, debug=True)
 
             for node_name in predictions["_debug"].keys():
                 node_output = predictions["_debug"][node_name]["output"]

diff --git a/test/test_eval.py b/test/test_eval.py
@@ -524,8 +524,8 @@ def test_reader_eval_in_pipeline(reader):
     pipeline.add_node(component=reader, name="Reader", inputs=["Query"])
     eval_result: EvaluationResult = pipeline.eval(
         labels=EVAL_LABELS,
-        documents=[[label.document for label in multilabel.labels] for multilabel in EVAL_LABELS],
-        params={}
+        params={},
+        pass_documents_as_input=True
     )
 
     metrics = eval_result.calculate_metrics()