Skip to content

Commit

Permalink
feat: Adding StringJoiner (#8357)
Browse files Browse the repository at this point in the history
* Adding StringJoiner

* Release notes

* Remove typing

* Remove unused import

* Try to fix header

* Fix one test

* Add to docs, move test to behavioral pipeline test

* Undo changes

* Fix test

* Update haystack/components/joiners/string_joiner.py

Co-authored-by: Stefano Fiorucci <[email protected]>

* Update haystack/components/joiners/string_joiner.py

Co-authored-by: Stefano Fiorucci <[email protected]>

* Provide usage example

* Apply suggestions from code review

Co-authored-by: Stefano Fiorucci <[email protected]>

---------

Co-authored-by: Stefano Fiorucci <[email protected]>
Co-authored-by: Silvano Cerza <[email protected]>
  • Loading branch information
3 people committed Oct 30, 2024
1 parent ab2eb8e commit 2fd1d78
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pip install haystack-ai

Install from the `main` branch to try the newest features:
```sh
pip install git+https://github.com/deepset-ai/haystack.git@main
pip install git+https://github.com/deepset-ai/haystack.git@main
```

Haystack supports multiple installation methods including Docker images. For a comprehensive guide please refer
Expand Down
2 changes: 1 addition & 1 deletion docs/pydoc/config/joiners_api.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack/components/joiners]
modules: ["document_joiner", "branch", "answer_joiner"]
modules: ["document_joiner", "branch", "answer_joiner", "string_joiner"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter
Expand Down
3 changes: 2 additions & 1 deletion haystack/components/joiners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
from .answer_joiner import AnswerJoiner
from .branch import BranchJoiner
from .document_joiner import DocumentJoiner
from .string_joiner import StringJoiner

__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner"]
__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner", "StringJoiner"]
59 changes: 59 additions & 0 deletions haystack/components/joiners/string_joiner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

from typing import List

from haystack import component, logging
from haystack.core.component.types import Variadic

logger = logging.getLogger(__name__)


@component
class StringJoiner:
"""
Component to join strings from different components to a list of strings.
### Usage example
```python
from haystack.components.joiners import StringJoiner
from haystack.components.builders import PromptBuilder
from haystack.core.pipeline import Pipeline
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage
string_1 = "What's Natural Language Processing?"
string_2 = "What is life?"
pipeline = Pipeline()
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
pipeline.add_component("string_joiner", StringJoiner())
pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")
print(pipeline.run(data={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}}))
>> {"string_joiner": {"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What is life?"]}}
```
"""

@component.output_types(strings=List[str])
def run(self, strings: Variadic[str]):
"""
Joins strings into a list of strings
:param strings:
strings from different components
:returns:
A dictionary with the following keys:
- `strings`: Merged list of strings
"""

out_strings = list(strings)
return {"strings": out_strings}
4 changes: 4 additions & 0 deletions releasenotes/notes/add-string-joiner-a7754e6bff9332ea.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
features:
- |
Added component StringJoiner to join strings from different components to a list of strings.
37 changes: 37 additions & 0 deletions test/components/joiners/test_string_joiner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

from haystack.core.serialization import component_from_dict, component_to_dict
from haystack.components.joiners.string_joiner import StringJoiner


class TestStringJoiner:
def test_init(self):
joiner = StringJoiner()
assert isinstance(joiner, StringJoiner)

def test_to_dict(self):
joiner = StringJoiner()
data = component_to_dict(joiner, name="string_joiner")
assert data == {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}

def test_from_dict(self):
data = {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}}
string_joiner = component_from_dict(StringJoiner, data=data, name="string_joiner")
assert isinstance(string_joiner, StringJoiner)

def test_empty_list(self):
joiner = StringJoiner()
result = joiner.run([])
assert result == {"strings": []}

def test_single_string(self):
joiner = StringJoiner()
result = joiner.run("a")
assert result == {"strings": ["a"]}

def test_two_strings(self):
joiner = StringJoiner()
result = joiner.run(["a", "b"])
assert result == {"strings": ["a", "b"]}
1 change: 1 addition & 0 deletions test/core/pipeline/features/pipeline_run.feature
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Feature: Pipeline running
| that is linear and a component in the middle receives optional input from other components and input from the user |
| that has a loop in the middle |
| that has variadic component that receives a conditional input |
| that has a string variadic component |

Scenario Outline: Running a bad Pipeline
Given a pipeline <kind>
Expand Down
31 changes: 30 additions & 1 deletion test/core/pipeline/features/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner
from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner, StringJoiner
from haystack.testing.sample_components import (
Accumulate,
AddFixedValue,
Expand Down Expand Up @@ -2195,3 +2195,32 @@ def run(self, documents: List[Document]):
],
),
]


@given("a pipeline that has a string variadic component", target_fixture="pipeline_data")
def that_has_a_string_variadic_component():
string_1 = "What's Natural Language Processing?"
string_2 = "What's is life?"

pipeline = Pipeline()
pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}"))
pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}"))
pipeline.add_component("string_joiner", StringJoiner())

pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings")
pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings")

return (
pipeline,
[
PipelineRunData(
inputs={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}},
expected_outputs={
"string_joiner": {
"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"]
}
},
expected_run_order=["prompt_builder_1", "prompt_builder_2", "string_joiner"],
)
],
)

0 comments on commit 2fd1d78

Please sign in to comment.