From 19829da01ba9d05c13aad45dc4756327803325ac Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Thu, 25 May 2023 10:30:41 +0200 Subject: [PATCH] refactor: Generate eval result in separate method (#5001) Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- haystack/pipelines/base.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index dca9a55d57..91aac7ac8d 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1366,15 +1366,45 @@ def eval_batch( event_properties={"pipeline.classname": self.__class__.__name__, "pipeline.config_hash": self.config_hash}, ) + predictions_batches = self.run_batch( + queries=[label.query for label in labels], labels=labels, documents=documents, params=params, debug=True + ) + + eval_result = self._generate_eval_result_from_batch_preds( + predictions_batches=predictions_batches, + params=params, + sas_model_name_or_path=sas_model_name_or_path, + sas_batch_size=sas_batch_size, + sas_use_gpu=sas_use_gpu, + add_isolated_node_eval=add_isolated_node_eval, + custom_document_id_field=custom_document_id_field, + context_matching_min_length=context_matching_min_length, + context_matching_boost_split_overlaps=context_matching_boost_split_overlaps, + context_matching_threshold=context_matching_threshold, + use_auth_token=use_auth_token, + ) + + return eval_result + + def _generate_eval_result_from_batch_preds( + self, + predictions_batches: Dict, + params: Optional[dict] = None, + sas_model_name_or_path: Optional[str] = None, + sas_batch_size: int = 32, + sas_use_gpu: bool = True, + add_isolated_node_eval: bool = False, + custom_document_id_field: Optional[str] = None, + context_matching_min_length: int = 100, + context_matching_boost_split_overlaps: bool = True, + context_matching_threshold: float = 65.0, + use_auth_token: Optional[Union[str, bool]] = None, + ) -> EvaluationResult: eval_result = EvaluationResult() if add_isolated_node_eval: params = {} if params is None else params.copy() params["add_isolated_node_eval"] = True - predictions_batches = self.run_batch( - queries=[label.query for label in labels], labels=labels, documents=documents, params=params, debug=True - ) - for node_name in predictions_batches["_debug"].keys(): node_output = predictions_batches["_debug"][node_name]["output"] df = self._build_eval_dataframe(