Merge branch 'main' into docstrings-linting

deepset-ai · Apr 23, 2024 · be983ac · be983ac
2 parents f7ae004 + 201db5b
commit be983ac
Show file tree

Hide file tree

Showing 55 changed files with 263 additions and 105 deletions.
diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py
@@ -10,6 +10,7 @@
 class AnswerBuilder:
     """
     Takes a query and the replies a Generator returns as input and parses them into GeneratedAnswer objects.
+
     Optionally, it also takes Documents and metadata from the Generator as inputs to enrich the GeneratedAnswer objects.
 
     Usage example:
@@ -126,9 +127,10 @@ def run(
     def _extract_answer_string(reply: str, pattern: Optional[str] = None) -> str:
         """
         Extract the answer string from the generator output using the specified pattern.
+
         If no pattern is specified, the whole string is used as the answer.
 
-        :param replies:
+        :param reply:
             The output of the Generator. A string.
         :param pattern:
             The regular expression pattern to use to extract the answer text from the generator output.

diff --git a/haystack/components/builders/dynamic_chat_prompt_builder.py b/haystack/components/builders/dynamic_chat_prompt_builder.py
@@ -11,10 +11,12 @@
 @component
 class DynamicChatPromptBuilder:
     """
-    DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances. It
-    integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the list
-    potentially containing a template and renders it with variables provided to the constructor. Additional template
-    variables can be feed into the component/pipeline `run` method and will be merged before rendering the template.
+    DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances.
+
+    It integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the
+    list potentially containing a template and renders it with variables provided to the constructor. Additional
+    template variables can be feed into the component/pipeline `run` method and will be merged before rendering the
+    template.
 
     Usage example:
     ```python
@@ -92,6 +94,7 @@ def __init__(self, runtime_variables: Optional[List[str]] = None):
     def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dict[str, Any]] = None, **kwargs):
         """
         Executes the dynamic prompt building process by processing a list of `ChatMessage` instances.
+
         Any user message or system message is inspected for templates and rendered with the variables provided to the
         constructor. You can provide additional template variables directly to this method, which are then merged with
         the variables provided to the constructor.
@@ -151,6 +154,7 @@ def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dic
     def _validate_template(self, template_text: str, provided_variables: Set[str]):
         """
         Checks if all the required template variables are provided to the pipeline `run` method.
+
         If all the required template variables are provided, returns a Jinja2 template object.
         Otherwise, raises a ValueError.
 

diff --git a/haystack/components/builders/dynamic_prompt_builder.py b/haystack/components/builders/dynamic_prompt_builder.py
@@ -10,8 +10,10 @@
 @component
 class DynamicPromptBuilder:
     """
-    DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline. Users can change the prompt
-    template at runtime by providing a new template for each pipeline run invocation if needed.
+    DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline.
+
+    Users can change the prompt template at runtime by providing a new template for each pipeline run invocation
+    if needed.
 
     Usage example:
     ```python
@@ -92,12 +94,15 @@ def __init__(self, runtime_variables: Optional[List[str]] = None):
 
     def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] = None, **kwargs):
         """
-        Executes the dynamic prompt building process. Depending on the provided type of `prompt_source`, this method
-        either processes a list of `ChatMessage` instances or a string template. In the case of `ChatMessage` instances,
-        the last user message is treated as a template and rendered with the resolved pipeline variables and any
-        additional template variables provided. For a string template, it directly applies the template variables to
-        render the final prompt. You can provide additional template variables directly to this method, that are then
-        merged with the variables resolved from the pipeline runtime.
+        Executes the dynamic prompt building process.
+
+        Depending on the provided type of `prompt_source`, this method either processes a list of `ChatMessage`
+        instances or a string template. In the case of `ChatMessage` instances, the last user message is treated as a
+        template and rendered with the resolved pipeline variables and any additional template variables provided.
+
+        For a string template, it directly applies the template variables to render the final prompt. You can provide
+        additional template variables directly to this method, that are then merged with the variables resolved from
+        the pipeline runtime.
 
         :param prompt_source:
             A string template.
@@ -127,6 +132,7 @@ def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] =
     def _validate_template(self, template_text: str, provided_variables: Set[str]):
         """
         Checks if all the required template variables are provided to the pipeline `run` method.
+
         If all the required template variables are provided, returns a Jinja2 template object.
         Otherwise, raises a ValueError.
 

diff --git a/haystack/components/builders/prompt_builder.py b/haystack/components/builders/prompt_builder.py
@@ -40,6 +40,8 @@ def to_dict(self) -> Dict[str, Any]:
     @component.output_types(prompt=str)
     def run(self, **kwargs):
         """
+        Renders the prompt template with the provided variables.
+
         :param kwargs:
             The variables that will be used to render the prompt template.
 

diff --git a/haystack/components/caching/cache_checker.py b/haystack/components/caching/cache_checker.py
@@ -10,8 +10,7 @@
 @component
 class CacheChecker:
     """
-    Checks for the presence of documents in a Document Store based on a specified
-    field in each document's metadata.
+    Checks for the presence of documents in a Document Store based on a specified field in each document's metadata.
 
     If matching documents are found, they are returned as hits. If not, the items
     are returned as misses, indicating they are not in the cache.
@@ -92,8 +91,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker":
     @component.output_types(hits=List[Document], misses=List)
     def run(self, items: List[Any]):
         """
-        Checks if any document associated with the specified cache field
-        is already present in the store.
+        Checks if any document associated with the specified cache field is already present in the store.
 
         :param items:
             Values to be checked against the cache field.

diff --git a/haystack/components/classifiers/document_language_classifier.py b/haystack/components/classifiers/document_language_classifier.py
@@ -50,6 +50,8 @@ class DocumentLanguageClassifier:
 
     def __init__(self, languages: Optional[List[str]] = None):
         """
+        Initialize the DocumentLanguageClassifier.
+
         :param languages: A list of languages in ISO code, each corresponding to a different output connection.
             For supported languages, see the [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages).
             If not specified, the default is ["en"].
@@ -63,6 +65,7 @@ def __init__(self, languages: Optional[List[str]] = None):
     def run(self, documents: List[Document]):
         """
         This method classifies the documents' language and adds it to their metadata.
+
         If a Document's text does not match any of the languages specified at initialization,
         the metadata value "unmatched" will be stored.
 

diff --git a/haystack/components/connectors/openapi_service.py b/haystack/components/connectors/openapi_service.py
@@ -16,6 +16,8 @@
 @component
 class OpenAPIServiceConnector:
     """
+    A component which connects the Haystack framework to OpenAPI services.
+
     The `OpenAPIServiceConnector` component connects the Haystack framework to OpenAPI services, enabling it to call
     operations as defined in the OpenAPI specification of the service.
 
@@ -77,8 +79,10 @@ def run(
         service_credentials: Optional[Union[dict, str]] = None,
     ) -> Dict[str, List[ChatMessage]]:
         """
-        Processes a list of chat messages to invoke a method on an OpenAPI service. It parses the last message in the
-        list, expecting it to contain an OpenAI function calling descriptor (name & parameters) in JSON format.
+        Processes a list of chat messages to invoke a method on an OpenAPI service.
+
+        It parses the last message in the list, expecting it to contain an OpenAI function calling descriptor
+        (name & parameters) in JSON format.
 
         :param messages: A list of `ChatMessage` objects containing the messages to be processed. The last message
         should contain the function invocation payload in OpenAI function calling format. See the example in the class
@@ -148,6 +152,8 @@ def _parse_message(self, message: ChatMessage) -> List[Dict[str, Any]]:
 
     def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[Union[dict, str]] = None):
         """
+        Authentication with an OpenAPI service.
+
         Authenticates with the OpenAPI service if required, supporting both single (str) and multiple
         authentication methods (dict).
 
@@ -201,8 +207,9 @@ def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[
 
     def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: Dict[str, Any]) -> Any:
         """
-        Invokes the specified method on the OpenAPI service. The method name and arguments are passed in the
-        method_invocation_descriptor.
+        Invokes the specified method on the OpenAPI service.
+
+        The method name and arguments are passed in the method_invocation_descriptor.
 
         :param openapi_service: The OpenAPI service instance.
         :param method_invocation_descriptor: The method name and arguments to be passed to the method. The payload

diff --git a/haystack/components/converters/azure.py b/haystack/components/converters/azure.py
@@ -23,7 +23,8 @@
 @component
 class AzureOCRDocumentConverter:
     """
-    A component for converting files to Documents using Azure's Document Intelligence service.
+    Convert files to documents using Azure's Document Intelligence service.
+
     Supported file formats are: PDF, JPEG, PNG, BMP, TIFF, DOCX, XLSX, PPTX, and HTML.
 
     In order to be able to use this component, you need an active Azure account
@@ -170,6 +171,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "AzureOCRDocumentConverter":
     # pylint: disable=line-too-long
     def _convert_tables_and_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
         """
+        Converts the tables and text extracted by Azure's Document Intelligence service into Haystack Documents.
+
         :param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
             can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
         :param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
@@ -188,6 +191,7 @@ def _convert_tables_and_text(self, result: "AnalyzeResult", meta: Optional[Dict[
     def _convert_tables(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
         """
         Converts the tables extracted by Azure's Document Intelligence service into Haystack Documents.
+
         :param result: The AnalyzeResult Azure object
         :param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
 
@@ -296,8 +300,10 @@ def _convert_tables(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]
 
     def _convert_to_natural_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> Document:
         """
-        This converts the `AnalyzeResult` object into a single Document. We add "\f" separators between to
-        differentiate between the text on separate pages. This is the expected format for the PreProcessor.
+        This converts the `AnalyzeResult` object into a single document.
+
+        We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
+        for the PreProcessor.
 
         :param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
             can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
@@ -340,8 +346,10 @@ def _convert_to_single_column_text(
         self, result: "AnalyzeResult", meta: Optional[Dict[str, str]], threshold_y: float = 0.05
     ) -> Document:
         """
-        This converts the `AnalyzeResult` object into a single Haystack Document. We add "\f" separators between to
-        differentiate between the text on separate pages. This is the expected format for the PreProcessor.
+        This converts the `AnalyzeResult` object into a single Haystack Document.
+
+        We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
+        for the PreProcessor.
 
         :param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
             can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
@@ -427,6 +435,7 @@ def _convert_to_single_column_text(
     def _collect_table_spans(self, result: "AnalyzeResult") -> Dict:
         """
         Collect the spans of all tables by page number.
+
         :param result: The AnalyzeResult object returned by the `begin_analyze_document` method.
         :returns: A dictionary with the page number as key and a list of table spans as value.
         """
@@ -443,6 +452,7 @@ def _check_if_in_table(
     ) -> bool:
         """
         Check if a line or paragraph is part of a table.
+
         :param tables_on_page: A dictionary with the page number as key and a list of table spans as value.
         :param line_or_paragraph: The line or paragraph to check.
         :returns: True if the line or paragraph is part of a table, False otherwise.
@@ -457,7 +467,9 @@ def _check_if_in_table(
 
     def _hash_dataframe(self, df: pd.DataFrame, desired_samples=5, hash_length=4) -> str:
         """
-        Returns a hash of the DataFrame content. The hash is based on the content of the DataFrame.
+        Returns a hash of the DataFrame content.
+
+        The hash is based on the content of the DataFrame.
         :param df: The DataFrame to hash.
         :param desired_samples: The desired number of samples to hash.
         :param hash_length: The length of the hash for each sample.

diff --git a/haystack/components/converters/openapi_functions.py b/haystack/components/converters/openapi_functions.py
@@ -112,6 +112,8 @@ def run(self, sources: List[Union[str, Path, ByteStream]]) -> Dict[str, Any]:
 
     def _openapi_to_functions(self, service_openapi_spec: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
+        OpenAPI to OpenAI function conversion.
+
         Extracts functions from the OpenAPI specification of the service and converts them into a format
         suitable for OpenAI function calling.
 
@@ -188,6 +190,8 @@ def _parse_property_attributes(
         self, property_schema: Dict[str, Any], include_attributes: Optional[List[str]] = None
     ) -> Dict[str, Any]:
         """
+        Parses the attributes of a property schema.
+
         Recursively parses the attributes of a property schema, including nested objects and arrays,
         and includes specified attributes like description, pattern, etc.
 

diff --git a/haystack/components/converters/utils.py b/haystack/components/converters/utils.py
@@ -7,6 +7,7 @@
 def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStream:
     """
     Creates a ByteStream object from a source.
+
     :param source: A source to convert to a ByteStream. Can be a string (path to a file), a Path object, or a ByteStream.
     :return: A ByteStream object.
     """
@@ -24,6 +25,8 @@ def normalize_metadata(
     meta: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], sources_count: int
 ) -> List[Dict[str, Any]]:
     """
+    Normalize the metadata input for a converter.
+
     Given all the possible value of the meta input for a converter (None, dictionary or list of dicts),
     makes sure to return a list of dictionaries of the correct length for the converter to use.
 

diff --git a/haystack/components/embedders/hugging_face_api_document_embedder.py b/haystack/components/embedders/hugging_face_api_document_embedder.py
@@ -19,6 +19,8 @@
 @component
 class HuggingFaceAPIDocumentEmbedder:
     """
+    A component that embeds documents using Hugging Face APIs.
+
     This component can be used to compute Document embeddings using different Hugging Face APIs:
     - [Free Serverless Inference API]((https://huggingface.co/inference-api)
     - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

diff --git a/haystack/components/embedders/hugging_face_api_text_embedder.py b/haystack/components/embedders/hugging_face_api_text_embedder.py
@@ -16,6 +16,8 @@
 @component
 class HuggingFaceAPITextEmbedder:
     """
+    A component that embeds text using Hugging Face APIs.
+
     This component can be used to embed strings using different Hugging Face APIs:
     - [Free Serverless Inference API]((https://huggingface.co/inference-api)
     - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)

diff --git a/haystack/components/evaluators/answer_exact_match.py b/haystack/components/evaluators/answer_exact_match.py
@@ -6,11 +6,13 @@
 @component
 class AnswerExactMatchEvaluator:
     """
-    Evaluator that checks if predicted answers exactly match ground truth answers.
+    An answer exact match evaluator class.
+
+    The evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
+    The result is a number from 0.0 to 1.0, it represents the proportion of predicted answers
+    that matched one of the ground truth answers.
+    There can be multiple ground truth answers and multiple predicted answers as input.
 
-    Each predicted answer is compared to one ground truth answer.
-    The final score is a number ranging from 0.0 to 1.0.
-    It represents the proportion of predicted answers that match their corresponding ground truth answer.
 
     Usage example:
     ```python
@@ -33,7 +35,8 @@ class AnswerExactMatchEvaluator:
     def run(self, ground_truth_answers: List[str], predicted_answers: List[str]) -> Dict[str, Any]:
         """
         Run the AnswerExactMatchEvaluator on the given inputs.
-        `ground_truth_answers` and `retrieved_answers` must have the same length.
+
+        The `ground_truth_answers` and `retrieved_answers` must have the same length.
 
         :param ground_truth_answers:
             A list of expected answers.

diff --git a/haystack/components/evaluators/document_map.py b/haystack/components/evaluators/document_map.py
@@ -6,6 +6,8 @@
 @component
 class DocumentMAPEvaluator:
     """
+    A Mean Average Precision (MAP) evaluator for documents.
+
     Evaluator that calculates the mean average precision of the retrieved documents, a metric
     that measures how high retrieved documents are ranked.
     Each question can have multiple ground truth documents and multiple retrieved documents.
@@ -43,6 +45,7 @@ def run(
     ) -> Dict[str, Any]:
         """
         Run the DocumentMAPEvaluator on the given inputs.
+
         All lists must have the same length.
 
         :param ground_truth_documents:
@@ -52,7 +55,7 @@ def run(
         :returns:
             A dictionary with the following outputs:
             - `score` - The average of calculated scores.
-            - `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
+            - `individual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
         """
         if len(ground_truth_documents) != len(retrieved_documents):
             msg = "The length of ground_truth_documents and retrieved_documents must be the same."

diff --git a/haystack/components/evaluators/document_recall.py b/haystack/components/evaluators/document_recall.py
@@ -32,6 +32,7 @@ def from_str(string: str) -> "RecallMode":
 class DocumentRecallEvaluator:
     """
     Evaluator that calculates the Recall score for a list of documents.
+
     Returns both a list of scores for each question and the average.
     There can be multiple ground truth documents and multiple predicted documents as input.
 
@@ -91,6 +92,7 @@ def run(
     ) -> Dict[str, Any]:
         """
         Run the DocumentRecallEvaluator on the given inputs.
+
         `ground_truth_documents` and `retrieved_documents` must have the same length.
 
         :param ground_truth_documents: