Skip to content

Commit

Permalink
Merge branch 'main' into docstrings-linting
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsbatista authored Apr 23, 2024
2 parents f7ae004 + 201db5b commit be983ac
Show file tree
Hide file tree
Showing 55 changed files with 263 additions and 105 deletions.
4 changes: 3 additions & 1 deletion haystack/components/builders/answer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
class AnswerBuilder:
"""
Takes a query and the replies a Generator returns as input and parses them into GeneratedAnswer objects.
Optionally, it also takes Documents and metadata from the Generator as inputs to enrich the GeneratedAnswer objects.
Usage example:
Expand Down Expand Up @@ -126,9 +127,10 @@ def run(
def _extract_answer_string(reply: str, pattern: Optional[str] = None) -> str:
"""
Extract the answer string from the generator output using the specified pattern.
If no pattern is specified, the whole string is used as the answer.
:param replies:
:param reply:
The output of the Generator. A string.
:param pattern:
The regular expression pattern to use to extract the answer text from the generator output.
Expand Down
12 changes: 8 additions & 4 deletions haystack/components/builders/dynamic_chat_prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
@component
class DynamicChatPromptBuilder:
"""
DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances. It
integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the list
potentially containing a template and renders it with variables provided to the constructor. Additional template
variables can be feed into the component/pipeline `run` method and will be merged before rendering the template.
DynamicChatPromptBuilder is designed to construct dynamic prompts from a list of `ChatMessage` instances.
It integrates with Jinja2 templating for dynamic prompt generation. It considers any user or system message in the
list potentially containing a template and renders it with variables provided to the constructor. Additional
template variables can be feed into the component/pipeline `run` method and will be merged before rendering the
template.
Usage example:
```python
Expand Down Expand Up @@ -92,6 +94,7 @@ def __init__(self, runtime_variables: Optional[List[str]] = None):
def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dict[str, Any]] = None, **kwargs):
"""
Executes the dynamic prompt building process by processing a list of `ChatMessage` instances.
Any user message or system message is inspected for templates and rendered with the variables provided to the
constructor. You can provide additional template variables directly to this method, which are then merged with
the variables provided to the constructor.
Expand Down Expand Up @@ -151,6 +154,7 @@ def run(self, prompt_source: List[ChatMessage], template_variables: Optional[Dic
def _validate_template(self, template_text: str, provided_variables: Set[str]):
"""
Checks if all the required template variables are provided to the pipeline `run` method.
If all the required template variables are provided, returns a Jinja2 template object.
Otherwise, raises a ValueError.
Expand Down
22 changes: 14 additions & 8 deletions haystack/components/builders/dynamic_prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
@component
class DynamicPromptBuilder:
"""
DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline. Users can change the prompt
template at runtime by providing a new template for each pipeline run invocation if needed.
DynamicPromptBuilder is designed to construct dynamic prompts for the pipeline.
Users can change the prompt template at runtime by providing a new template for each pipeline run invocation
if needed.
Usage example:
```python
Expand Down Expand Up @@ -92,12 +94,15 @@ def __init__(self, runtime_variables: Optional[List[str]] = None):

def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] = None, **kwargs):
"""
Executes the dynamic prompt building process. Depending on the provided type of `prompt_source`, this method
either processes a list of `ChatMessage` instances or a string template. In the case of `ChatMessage` instances,
the last user message is treated as a template and rendered with the resolved pipeline variables and any
additional template variables provided. For a string template, it directly applies the template variables to
render the final prompt. You can provide additional template variables directly to this method, that are then
merged with the variables resolved from the pipeline runtime.
Executes the dynamic prompt building process.
Depending on the provided type of `prompt_source`, this method either processes a list of `ChatMessage`
instances or a string template. In the case of `ChatMessage` instances, the last user message is treated as a
template and rendered with the resolved pipeline variables and any additional template variables provided.
For a string template, it directly applies the template variables to render the final prompt. You can provide
additional template variables directly to this method, that are then merged with the variables resolved from
the pipeline runtime.
:param prompt_source:
A string template.
Expand Down Expand Up @@ -127,6 +132,7 @@ def run(self, prompt_source: str, template_variables: Optional[Dict[str, Any]] =
def _validate_template(self, template_text: str, provided_variables: Set[str]):
"""
Checks if all the required template variables are provided to the pipeline `run` method.
If all the required template variables are provided, returns a Jinja2 template object.
Otherwise, raises a ValueError.
Expand Down
2 changes: 2 additions & 0 deletions haystack/components/builders/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def to_dict(self) -> Dict[str, Any]:
@component.output_types(prompt=str)
def run(self, **kwargs):
"""
Renders the prompt template with the provided variables.
:param kwargs:
The variables that will be used to render the prompt template.
Expand Down
6 changes: 2 additions & 4 deletions haystack/components/caching/cache_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
@component
class CacheChecker:
"""
Checks for the presence of documents in a Document Store based on a specified
field in each document's metadata.
Checks for the presence of documents in a Document Store based on a specified field in each document's metadata.
If matching documents are found, they are returned as hits. If not, the items
are returned as misses, indicating they are not in the cache.
Expand Down Expand Up @@ -92,8 +91,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker":
@component.output_types(hits=List[Document], misses=List)
def run(self, items: List[Any]):
"""
Checks if any document associated with the specified cache field
is already present in the store.
Checks if any document associated with the specified cache field is already present in the store.
:param items:
Values to be checked against the cache field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class DocumentLanguageClassifier:

def __init__(self, languages: Optional[List[str]] = None):
"""
Initialize the DocumentLanguageClassifier.
:param languages: A list of languages in ISO code, each corresponding to a different output connection.
For supported languages, see the [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages).
If not specified, the default is ["en"].
Expand All @@ -63,6 +65,7 @@ def __init__(self, languages: Optional[List[str]] = None):
def run(self, documents: List[Document]):
"""
This method classifies the documents' language and adds it to their metadata.
If a Document's text does not match any of the languages specified at initialization,
the metadata value "unmatched" will be stored.
Expand Down
15 changes: 11 additions & 4 deletions haystack/components/connectors/openapi_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
@component
class OpenAPIServiceConnector:
"""
A component which connects the Haystack framework to OpenAPI services.
The `OpenAPIServiceConnector` component connects the Haystack framework to OpenAPI services, enabling it to call
operations as defined in the OpenAPI specification of the service.
Expand Down Expand Up @@ -77,8 +79,10 @@ def run(
service_credentials: Optional[Union[dict, str]] = None,
) -> Dict[str, List[ChatMessage]]:
"""
Processes a list of chat messages to invoke a method on an OpenAPI service. It parses the last message in the
list, expecting it to contain an OpenAI function calling descriptor (name & parameters) in JSON format.
Processes a list of chat messages to invoke a method on an OpenAPI service.
It parses the last message in the list, expecting it to contain an OpenAI function calling descriptor
(name & parameters) in JSON format.
:param messages: A list of `ChatMessage` objects containing the messages to be processed. The last message
should contain the function invocation payload in OpenAI function calling format. See the example in the class
Expand Down Expand Up @@ -148,6 +152,8 @@ def _parse_message(self, message: ChatMessage) -> List[Dict[str, Any]]:

def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[Union[dict, str]] = None):
"""
Authentication with an OpenAPI service.
Authenticates with the OpenAPI service if required, supporting both single (str) and multiple
authentication methods (dict).
Expand Down Expand Up @@ -201,8 +207,9 @@ def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[

def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: Dict[str, Any]) -> Any:
"""
Invokes the specified method on the OpenAPI service. The method name and arguments are passed in the
method_invocation_descriptor.
Invokes the specified method on the OpenAPI service.
The method name and arguments are passed in the method_invocation_descriptor.
:param openapi_service: The OpenAPI service instance.
:param method_invocation_descriptor: The method name and arguments to be passed to the method. The payload
Expand Down
24 changes: 18 additions & 6 deletions haystack/components/converters/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
@component
class AzureOCRDocumentConverter:
"""
A component for converting files to Documents using Azure's Document Intelligence service.
Convert files to documents using Azure's Document Intelligence service.
Supported file formats are: PDF, JPEG, PNG, BMP, TIFF, DOCX, XLSX, PPTX, and HTML.
In order to be able to use this component, you need an active Azure account
Expand Down Expand Up @@ -170,6 +171,8 @@ def from_dict(cls, data: Dict[str, Any]) -> "AzureOCRDocumentConverter":
# pylint: disable=line-too-long
def _convert_tables_and_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
"""
Converts the tables and text extracted by Azure's Document Intelligence service into Haystack Documents.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
:param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
Expand All @@ -188,6 +191,7 @@ def _convert_tables_and_text(self, result: "AnalyzeResult", meta: Optional[Dict[
def _convert_tables(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> List[Document]:
"""
Converts the tables extracted by Azure's Document Intelligence service into Haystack Documents.
:param result: The AnalyzeResult Azure object
:param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
Expand Down Expand Up @@ -296,8 +300,10 @@ def _convert_tables(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]

def _convert_to_natural_text(self, result: "AnalyzeResult", meta: Optional[Dict[str, Any]]) -> Document:
"""
This converts the `AnalyzeResult` object into a single Document. We add "\f" separators between to
differentiate between the text on separate pages. This is the expected format for the PreProcessor.
This converts the `AnalyzeResult` object into a single document.
We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
for the PreProcessor.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
Expand Down Expand Up @@ -340,8 +346,10 @@ def _convert_to_single_column_text(
self, result: "AnalyzeResult", meta: Optional[Dict[str, str]], threshold_y: float = 0.05
) -> Document:
"""
This converts the `AnalyzeResult` object into a single Haystack Document. We add "\f" separators between to
differentiate between the text on separate pages. This is the expected format for the PreProcessor.
This converts the `AnalyzeResult` object into a single Haystack Document.
We add "\f" separators between to differentiate between the text on separate pages. This is the expected format
for the PreProcessor.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method. Docs on Analyze result
can be found [here](https://azuresdkdocs.blob.core.windows.net/$web/python/azure-ai-formrecognizer/3.3.0/azure.ai.formrecognizer.html?highlight=read#azure.ai.formrecognizer.AnalyzeResult).
Expand Down Expand Up @@ -427,6 +435,7 @@ def _convert_to_single_column_text(
def _collect_table_spans(self, result: "AnalyzeResult") -> Dict:
"""
Collect the spans of all tables by page number.
:param result: The AnalyzeResult object returned by the `begin_analyze_document` method.
:returns: A dictionary with the page number as key and a list of table spans as value.
"""
Expand All @@ -443,6 +452,7 @@ def _check_if_in_table(
) -> bool:
"""
Check if a line or paragraph is part of a table.
:param tables_on_page: A dictionary with the page number as key and a list of table spans as value.
:param line_or_paragraph: The line or paragraph to check.
:returns: True if the line or paragraph is part of a table, False otherwise.
Expand All @@ -457,7 +467,9 @@ def _check_if_in_table(

def _hash_dataframe(self, df: pd.DataFrame, desired_samples=5, hash_length=4) -> str:
"""
Returns a hash of the DataFrame content. The hash is based on the content of the DataFrame.
Returns a hash of the DataFrame content.
The hash is based on the content of the DataFrame.
:param df: The DataFrame to hash.
:param desired_samples: The desired number of samples to hash.
:param hash_length: The length of the hash for each sample.
Expand Down
4 changes: 4 additions & 0 deletions haystack/components/converters/openapi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ def run(self, sources: List[Union[str, Path, ByteStream]]) -> Dict[str, Any]:

def _openapi_to_functions(self, service_openapi_spec: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
OpenAPI to OpenAI function conversion.
Extracts functions from the OpenAPI specification of the service and converts them into a format
suitable for OpenAI function calling.
Expand Down Expand Up @@ -188,6 +190,8 @@ def _parse_property_attributes(
self, property_schema: Dict[str, Any], include_attributes: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Parses the attributes of a property schema.
Recursively parses the attributes of a property schema, including nested objects and arrays,
and includes specified attributes like description, pattern, etc.
Expand Down
3 changes: 3 additions & 0 deletions haystack/components/converters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStream:
"""
Creates a ByteStream object from a source.
:param source: A source to convert to a ByteStream. Can be a string (path to a file), a Path object, or a ByteStream.
:return: A ByteStream object.
"""
Expand All @@ -24,6 +25,8 @@ def normalize_metadata(
meta: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], sources_count: int
) -> List[Dict[str, Any]]:
"""
Normalize the metadata input for a converter.
Given all the possible value of the meta input for a converter (None, dictionary or list of dicts),
makes sure to return a list of dictionaries of the correct length for the converter to use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
@component
class HuggingFaceAPIDocumentEmbedder:
"""
A component that embeds documents using Hugging Face APIs.
This component can be used to compute Document embeddings using different Hugging Face APIs:
- [Free Serverless Inference API]((https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
@component
class HuggingFaceAPITextEmbedder:
"""
A component that embeds text using Hugging Face APIs.
This component can be used to embed strings using different Hugging Face APIs:
- [Free Serverless Inference API]((https://huggingface.co/inference-api)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
Expand Down
13 changes: 8 additions & 5 deletions haystack/components/evaluators/answer_exact_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
@component
class AnswerExactMatchEvaluator:
"""
Evaluator that checks if predicted answers exactly match ground truth answers.
An answer exact match evaluator class.
The evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
The result is a number from 0.0 to 1.0, it represents the proportion of predicted answers
that matched one of the ground truth answers.
There can be multiple ground truth answers and multiple predicted answers as input.
Each predicted answer is compared to one ground truth answer.
The final score is a number ranging from 0.0 to 1.0.
It represents the proportion of predicted answers that match their corresponding ground truth answer.
Usage example:
```python
Expand All @@ -33,7 +35,8 @@ class AnswerExactMatchEvaluator:
def run(self, ground_truth_answers: List[str], predicted_answers: List[str]) -> Dict[str, Any]:
"""
Run the AnswerExactMatchEvaluator on the given inputs.
`ground_truth_answers` and `retrieved_answers` must have the same length.
The `ground_truth_answers` and `retrieved_answers` must have the same length.
:param ground_truth_answers:
A list of expected answers.
Expand Down
5 changes: 4 additions & 1 deletion haystack/components/evaluators/document_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
@component
class DocumentMAPEvaluator:
"""
A Mean Average Precision (MAP) evaluator for documents.
Evaluator that calculates the mean average precision of the retrieved documents, a metric
that measures how high retrieved documents are ranked.
Each question can have multiple ground truth documents and multiple retrieved documents.
Expand Down Expand Up @@ -43,6 +45,7 @@ def run(
) -> Dict[str, Any]:
"""
Run the DocumentMAPEvaluator on the given inputs.
All lists must have the same length.
:param ground_truth_documents:
Expand All @@ -52,7 +55,7 @@ def run(
:returns:
A dictionary with the following outputs:
- `score` - The average of calculated scores.
- `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
- `individual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
"""
if len(ground_truth_documents) != len(retrieved_documents):
msg = "The length of ground_truth_documents and retrieved_documents must be the same."
Expand Down
2 changes: 2 additions & 0 deletions haystack/components/evaluators/document_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def from_str(string: str) -> "RecallMode":
class DocumentRecallEvaluator:
"""
Evaluator that calculates the Recall score for a list of documents.
Returns both a list of scores for each question and the average.
There can be multiple ground truth documents and multiple predicted documents as input.
Expand Down Expand Up @@ -91,6 +92,7 @@ def run(
) -> Dict[str, Any]:
"""
Run the DocumentRecallEvaluator on the given inputs.
`ground_truth_documents` and `retrieved_documents` must have the same length.
:param ground_truth_documents:
Expand Down
Loading

0 comments on commit be983ac

Please sign in to comment.