Skip to content

Commit

Permalink
Merge branch 'main' into document-id-update
Browse files Browse the repository at this point in the history
  • Loading branch information
julian-risch authored Jan 9, 2025
2 parents 4b75c28 + fe9b1e2 commit 8da30e6
Show file tree
Hide file tree
Showing 18 changed files with 44 additions and 54 deletions.
2 changes: 1 addition & 1 deletion haystack/components/audio/whisper_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(
whisper_import.check()
if model not in get_args(WhisperLocalModel):
raise ValueError(
f"Model name '{model}' not recognized. Choose one among: " f"{', '.join(get_args(WhisperLocalModel))}."
f"Model name '{model}' not recognized. Choose one among: {', '.join(get_args(WhisperLocalModel))}."
)
self.model = model
self.whisper_params = whisper_params or {}
Expand Down
3 changes: 1 addition & 2 deletions haystack/components/converters/openapi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,7 @@ def _parse_openapi_spec(self, content: str) -> Dict[str, Any]:
open_api_spec_content = yaml.safe_load(content)
except yaml.YAMLError:
error_message = (
"Failed to parse the OpenAPI specification. "
"The content does not appear to be valid JSON or YAML.\n\n"
"Failed to parse the OpenAPI specification. The content does not appear to be valid JSON or YAML.\n\n"
)
raise RuntimeError(error_message, content)

Expand Down
2 changes: 1 addition & 1 deletion haystack/components/generators/chat/hugging_face_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __init__( # pylint: disable=too-many-positional-arguments

if task not in PIPELINE_SUPPORTED_TASKS:
raise ValueError(
f"Task '{task}' is not supported. " f"The supported tasks are: {', '.join(PIPELINE_SUPPORTED_TASKS)}."
f"Task '{task}' is not supported. The supported tasks are: {', '.join(PIPELINE_SUPPORTED_TASKS)}."
)
huggingface_pipeline_kwargs["task"] = task

Expand Down
4 changes: 2 additions & 2 deletions haystack/components/rankers/lost_in_the_middle.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, word_count_threshold: Optional[int] = None, top_k: Optional[i
"""
if isinstance(word_count_threshold, int) and word_count_threshold <= 0:
raise ValueError(
f"Invalid value for word_count_threshold: {word_count_threshold}. " f"word_count_threshold must be > 0."
f"Invalid value for word_count_threshold: {word_count_threshold}. word_count_threshold must be > 0."
)
if isinstance(top_k, int) and top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
Expand All @@ -78,7 +78,7 @@ def run(
"""
if isinstance(word_count_threshold, int) and word_count_threshold <= 0:
raise ValueError(
f"Invalid value for word_count_threshold: {word_count_threshold}. " f"word_count_threshold must be > 0."
f"Invalid value for word_count_threshold: {word_count_threshold}. word_count_threshold must be > 0."
)
if isinstance(top_k, int) and top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
Expand Down
10 changes: 5 additions & 5 deletions haystack/core/component/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ def __call__(cls, *args, **kwargs):
try:
pre_init_hook.in_progress = True
named_positional_args = ComponentMeta._positional_to_kwargs(cls, args)
assert (
set(named_positional_args.keys()).intersection(kwargs.keys()) == set()
), "positional and keyword arguments overlap"
assert set(named_positional_args.keys()).intersection(kwargs.keys()) == set(), (
"positional and keyword arguments overlap"
)
kwargs.update(named_positional_args)
pre_init_hook.callback(cls, kwargs)
instance = super().__call__(**kwargs)
Expand Down Expand Up @@ -309,8 +309,8 @@ def _component_repr(component: Component) -> str:
# We're explicitly ignoring the type here because we're sure that the component
# has the __haystack_input__ and __haystack_output__ attributes at this point
return (
f'{result}\n{getattr(component, "__haystack_input__", "<invalid_input_sockets>")}'
f'\n{getattr(component, "__haystack_output__", "<invalid_output_sockets>")}'
f"{result}\n{getattr(component, '__haystack_input__', '<invalid_input_sockets>')}"
f"\n{getattr(component, '__haystack_output__', '<invalid_output_sockets>')}"
)


Expand Down
6 changes: 3 additions & 3 deletions haystack/core/pipeline/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str:
}

states = {
comp: f"{comp}[\"<b>{comp}</b><br><small><i>{type(data['instance']).__name__}{optional_inputs[comp]}</i></small>\"]:::component" # noqa
comp: f'{comp}["<b>{comp}</b><br><small><i>{type(data["instance"]).__name__}{optional_inputs[comp]}</i></small>"]:::component' # noqa
for comp, data in graph.nodes(data=True)
if comp not in ["input", "output"]
}
Expand All @@ -139,11 +139,11 @@ def _to_mermaid_text(graph: networkx.MultiDiGraph) -> str:
connections_list.append(conn_string)

input_connections = [
f"i{{&ast;}}--\"{conn_data['label']}<br><small><i>{conn_data['conn_type']}</i></small>\"--> {states[to_comp]}"
f'i{{&ast;}}--"{conn_data["label"]}<br><small><i>{conn_data["conn_type"]}</i></small>"--> {states[to_comp]}'
for _, to_comp, conn_data in graph.out_edges("input", data=True)
]
output_connections = [
f"{states[from_comp]}--\"{conn_data['label']}<br><small><i>{conn_data['conn_type']}</i></small>\"--> o{{&ast;}}"
f'{states[from_comp]}--"{conn_data["label"]}<br><small><i>{conn_data["conn_type"]}</i></small>"--> o{{&ast;}}'
for from_comp, _, conn_data in graph.in_edges("output", data=True)
]
connections = "\n".join(connections_list + input_connections + output_connections)
Expand Down
9 changes: 3 additions & 6 deletions haystack/document_stores/in_memory/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc
if filters:
if "operator" not in filters and "conditions" not in filters:
raise ValueError(
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering "
"for details."
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
)
return [doc for doc in self.storage.values() if document_matches_filter(filters=filters, document=doc)]
return list(self.storage.values())
Expand Down Expand Up @@ -506,8 +505,7 @@ def bm25_retrieval(
if filters:
if "operator" not in filters:
raise ValueError(
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering "
"for details."
"Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
)
filters = {"operator": "AND", "conditions": [content_type_filter, filters]}
else:
Expand Down Expand Up @@ -574,8 +572,7 @@ def embedding_retrieval( # pylint: disable=too-many-positional-arguments
return []
elif len(documents_with_embeddings) < len(all_documents):
logger.info(
"Skipping some Documents that don't have an embedding. "
"To generate embeddings, use a DocumentEmbedder."
"Skipping some Documents that don't have an embedding. To generate embeddings, use a DocumentEmbedder."
)

scores = self._compute_query_embedding_similarity_scores(
Expand Down
3 changes: 1 addition & 2 deletions haystack/marshal/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ def marshal(self, dict_: Dict[str, Any]) -> str:
return yaml.dump(dict_, Dumper=YamlDumper)
except yaml.representer.RepresenterError as e:
raise TypeError(
"Error dumping pipeline to YAML - Ensure that all pipeline "
"components only serialize basic Python types"
"Error dumping pipeline to YAML - Ensure that all pipeline components only serialize basic Python types"
) from e

def unmarshal(self, data_: Union[str, bytes, bytearray]) -> Dict[str, Any]:
Expand Down
3 changes: 1 addition & 2 deletions haystack/utils/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ def _less_than_equal(document_value: Any, filter_value: Any) -> bool:
def _in(document_value: Any, filter_value: Any) -> bool:
if not isinstance(filter_value, list):
msg = (
f"Filter value must be a `list` when using operator 'in' or 'not in', "
f"received type '{type(filter_value)}'"
f"Filter value must be a `list` when using operator 'in' or 'not in', received type '{type(filter_value)}'"
)
raise FilterError(msg)
return any(_equal(e, document_value) for e in filter_value)
Expand Down
2 changes: 1 addition & 1 deletion haystack/utils/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def resolve_hf_pipeline_kwargs( # pylint: disable=too-many-positional-arguments
task = model_info(huggingface_pipeline_kwargs["model"], token=huggingface_pipeline_kwargs["token"]).pipeline_tag

if task not in supported_tasks:
raise ValueError(f"Task '{task}' is not supported. " f"The supported tasks are: {', '.join(supported_tasks)}.")
raise ValueError(f"Task '{task}' is not supported. The supported tasks are: {', '.join(supported_tasks)}.")
huggingface_pipeline_kwargs["task"] = task
return huggingface_pipeline_kwargs

Expand Down
12 changes: 6 additions & 6 deletions test/components/audio/test_whisper_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,14 @@ def test_whisper_local_transcriber(self, test_files_path):
docs = output["documents"]
assert len(docs) == 3

assert all(
word in docs[0].content.strip().lower() for word in {"content", "the", "document"}
), f"Expected words not found in: {docs[0].content.strip().lower()}"
assert all(word in docs[0].content.strip().lower() for word in {"content", "the", "document"}), (
f"Expected words not found in: {docs[0].content.strip().lower()}"
)
assert test_files_path / "audio" / "this is the content of the document.wav" == docs[0].meta["audio_file"]

assert all(
word in docs[1].content.strip().lower() for word in {"context", "answer"}
), f"Expected words not found in: {docs[1].content.strip().lower()}"
assert all(word in docs[1].content.strip().lower() for word in {"context", "answer"}), (
f"Expected words not found in: {docs[1].content.strip().lower()}"
)
path = test_files_path / "audio" / "the context for this answer is here.wav"
assert path.absolute() == docs[1].meta["audio_file"]

Expand Down
6 changes: 3 additions & 3 deletions test/components/converters/test_docx_file_to_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def test_run_with_table(self, test_files_path):
table_index = next(i for i, part in enumerate(content_parts) if "| This | Is | Just a |" in part)
# check that natural order of the document is preserved
assert any("Donald Trump" in part for part in content_parts[:table_index]), "Text before table not found"
assert any(
"Now we are in Page 2" in part for part in content_parts[table_index + 1 :]
), "Text after table not found"
assert any("Now we are in Page 2" in part for part in content_parts[table_index + 1 :]), (
"Text after table not found"
)

def test_run_with_store_full_path_false(self, test_files_path):
"""
Expand Down
6 changes: 3 additions & 3 deletions test/components/embedders/test_openai_document_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ def test_run(self):
assert len(doc.embedding) == 1536
assert all(isinstance(x, float) for x in doc.embedding)

assert (
"text" in result["meta"]["model"] and "ada" in result["meta"]["model"]
), "The model name does not contain 'text' and 'ada'"
assert "text" in result["meta"]["model"] and "ada" in result["meta"]["model"], (
"The model name does not contain 'text' and 'ada'"
)

assert result["meta"]["usage"] == {"prompt_tokens": 15, "total_tokens": 15}, "Usage information does not match"
6 changes: 3 additions & 3 deletions test/components/embedders/test_openai_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def test_run(self):
assert len(result["embedding"]) == 1536
assert all(isinstance(x, float) for x in result["embedding"])

assert (
"text" in result["meta"]["model"] and "ada" in result["meta"]["model"]
), "The model name does not contain 'text' and 'ada'"
assert "text" in result["meta"]["model"] and "ada" in result["meta"]["model"], (
"The model name does not contain 'text' and 'ada'"
)

assert result["meta"]["usage"] == {"prompt_tokens": 6, "total_tokens": 6}, "Usage information does not match"
6 changes: 3 additions & 3 deletions test/components/joiners/test_document_joiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,6 @@ def test_test_score_norm_with_rrf(self):
for i in range(len(join_results["documents"]) - 1)
)

assert (
is_sorted
), "Documents are not sorted in descending order by score, there is an issue with rff ranking"
assert is_sorted, (
"Documents are not sorted in descending order by score, there is an issue with rff ranking"
)
8 changes: 2 additions & 6 deletions test/components/preprocessors/test_document_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_remove_whitespaces(self):
)
assert len(result["documents"]) == 1
assert result["documents"][0].content == (
"This is a text with some words. " "" "There is a second sentence. " "" "And there is a third sentence.\f"
"This is a text with some words. There is a second sentence. And there is a third sentence.\f"
)

def test_remove_substrings(self):
Expand Down Expand Up @@ -210,11 +210,7 @@ def test_ascii_only(self):
def test_other_document_fields_are_not_lost(self):
cleaner = DocumentCleaner(keep_id=True)
document = Document(
content="This is a text with some words. \n"
""
"There is a second sentence. \n"
""
"And there is a third sentence.\n",
content="This is a text with some words. \nThere is a second sentence. \nAnd there is a third sentence.\n",
dataframe=DataFrame({"col1": [1], "col2": [2]}),
blob=ByteStream.from_string("some_data"),
meta={"data": 1},
Expand Down
6 changes: 3 additions & 3 deletions test/components/routers/test_conditional_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,9 @@ def test_router_with_optional_parameters(self):

# Test pipeline without path parameter
result = pipe.run(data={"router": {"question": "What?"}})
assert result["router"] == {
"fallback": "What?"
}, "Default route should work in pipeline when 'path' is not provided"
assert result["router"] == {"fallback": "What?"}, (
"Default route should work in pipeline when 'path' is not provided"
)

# Test pipeline with path parameter
result = pipe.run(data={"router": {"question": "What?", "path": "followup_short"}})
Expand Down
4 changes: 2 additions & 2 deletions test/core/pipeline/features/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def pipeline_that_has_a_component_with_only_default_inputs():
"answers": [
GeneratedAnswer(
data="Paris",
query="What " "is " "the " "capital " "of " "France?",
query="What is the capital of France?",
documents=[
Document(
id="413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651",
Expand Down Expand Up @@ -916,7 +916,7 @@ def fake_generator_run(self, generation_kwargs: Optional[Dict[str, Any]] = None,
pipe,
[
PipelineRunData(
inputs={"prompt_builder": {"query": "What is the capital of " "Italy?"}},
inputs={"prompt_builder": {"query": "What is the capital of Italy?"}},
expected_outputs={"router": {"correct_replies": ["Rome"]}},
expected_run_order=["prompt_builder", "generator", "router", "prompt_builder", "generator", "router"],
)
Expand Down

0 comments on commit 8da30e6

Please sign in to comment.