Skip to content

Commit

Permalink
Fix _debug info getting lost for previous nodes when using join nodes (
Browse files Browse the repository at this point in the history
…#2776)

* fix debug output for pipelines with join nodes

* add test

* Update Documentation & Code Style

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
tstadel and github-actions[bot] authored Jul 7, 2022
1 parent a766b70 commit 45136ba
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
6 changes: 5 additions & 1 deletion haystack/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,11 @@ def run( # type: ignore
if queue.get(n): # concatenate inputs if it's a join node
existing_input = queue[n]
if "inputs" not in existing_input.keys():
updated_input: dict = {"inputs": [existing_input, node_output], "params": params}
updated_input: dict = {
"inputs": [existing_input, node_output],
"params": params,
"_debug": {**existing_input["_debug"], **node_output["_debug"]},
}
if query:
updated_input["query"] = query
if file_paths:
Expand Down
36 changes: 35 additions & 1 deletion test/pipelines/test_pipeline_debug_and_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from haystack.pipelines import Pipeline, RootNode
from haystack.nodes import FARMReader, BM25Retriever
from haystack.nodes import FARMReader, BM25Retriever, JoinDocuments

from ..conftest import SAMPLES_PATH, MockRetriever as BaseMockRetriever, MockReader

Expand Down Expand Up @@ -108,6 +108,40 @@ def test_debug_attributes_per_node(document_store_with_docs, tmp_path):
json.dumps(prediction, default=str)


@pytest.mark.elasticsearch
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
def test_debug_attributes_for_join_nodes(document_store_with_docs, tmp_path):

es_retriever_1 = BM25Retriever(document_store=document_store_with_docs)
es_retriever_2 = BM25Retriever(document_store=document_store_with_docs)

pipeline = Pipeline()
pipeline.add_node(component=es_retriever_1, name="ESRetriever1", inputs=["Query"])
pipeline.add_node(component=es_retriever_2, name="ESRetriever2", inputs=["Query"])
pipeline.add_node(component=JoinDocuments(), name="JoinDocuments", inputs=["ESRetriever1", "ESRetriever2"])

prediction = pipeline.run(query="Who lives in Berlin?", debug=True)
assert "_debug" in prediction.keys()
assert "ESRetriever1" in prediction["_debug"].keys()
assert "ESRetriever2" in prediction["_debug"].keys()
assert "JoinDocuments" in prediction["_debug"].keys()
assert "input" in prediction["_debug"]["ESRetriever1"].keys()
assert "output" in prediction["_debug"]["ESRetriever1"].keys()
assert "input" in prediction["_debug"]["ESRetriever2"].keys()
assert "output" in prediction["_debug"]["ESRetriever2"].keys()
assert "input" in prediction["_debug"]["JoinDocuments"].keys()
assert "output" in prediction["_debug"]["JoinDocuments"].keys()
assert prediction["_debug"]["ESRetriever1"]["input"]
assert prediction["_debug"]["ESRetriever1"]["output"]
assert prediction["_debug"]["ESRetriever2"]["input"]
assert prediction["_debug"]["ESRetriever2"]["output"]
assert prediction["_debug"]["JoinDocuments"]["input"]
assert prediction["_debug"]["JoinDocuments"]["output"]

# Avoid circular reference: easiest way to detect those is to use json.dumps
json.dumps(prediction, default=str)


@pytest.mark.elasticsearch
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
def test_global_debug_attributes_override_node_ones(document_store_with_docs, tmp_path):
Expand Down

0 comments on commit 45136ba

Please sign in to comment.