diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py index 3ecc311cd4..023a16a309 100644 --- a/.github/utils/generate_json_schema.py +++ b/.github/utils/generate_json_schema.py @@ -8,6 +8,4 @@ sys.path.append(".") from haystack.nodes._json_schema import update_json_schema -update_json_schema( - update_index=True, destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas" -) +update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas") diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index 0cce6fc5d0..1e44e16482 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -83,7 +83,7 @@ Default value is True. ```python @classmethod @abstractmethod -def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -132,6 +132,7 @@ Here's a sample configuration: to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise) @@ -561,7 +562,7 @@ Create a Graphviz visualization of the pipeline. ```python @classmethod -def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a YAML file defining the individual components and how they're tied together to form @@ -610,6 +611,7 @@ If the pipeline loads correctly regardless, save again the pipeline using `Pipel to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise) @@ -617,7 +619,7 @@ variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an ```python @classmethod -def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -666,6 +668,7 @@ Here's a sample configuration: to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise). @@ -769,7 +772,7 @@ def __init__(address: str = None, **kwargs) ```python @classmethod -def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, **kwargs, ,) +def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, strict_version_check: bool = False, **kwargs, ,) ``` Load Pipeline from a YAML file defining the individual components and how they're tied together to form diff --git a/docs/_src/tutorials/tutorials/8.md b/docs/_src/tutorials/tutorials/8.md index 1480272354..5e56342818 100644 --- a/docs/_src/tutorials/tutorials/8.md +++ b/docs/_src/tutorials/tutorials/8.md @@ -37,7 +37,7 @@ This tutorial will show you all the tools that Haystack provides to help you cas !pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,ocr] # For Colab/linux based machines -!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz +!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz !tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin # For Macos machines diff --git a/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json b/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json index 6524ed657c..6362d915a6 100644 --- a/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json @@ -9,17 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "1.0.0" - }, - { - "const": "1.1.0" - }, - { - "const": "1.2.0" - } - ] + "const": "1.0.0" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline-unstable.schema.json b/haystack/json-schemas/haystack-pipeline-1.1.0.schema.json similarity index 86% rename from haystack/json-schemas/haystack-pipeline-unstable.schema.json rename to haystack/json-schemas/haystack-pipeline-1.1.0.schema.json index c480fbf584..af50926314 100644 --- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.1.0.schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://haystack.deepset.ai/haystack/json-schemas/haystack-pipeline-unstable.schema.json", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.1.0.schema.json", "title": "Haystack Pipeline", "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", "type": "object", @@ -9,14 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "unstable" - }, - { - "const": "1.3.1rc0" - } - ] + "const": "1.1.0" }, "components": { "title": "Components", @@ -48,9 +41,6 @@ { "$ref": "#/definitions/OpenSearchDocumentStoreComponent" }, - { - "$ref": "#/definitions/PineconeDocumentStoreComponent" - }, { "$ref": "#/definitions/SQLDocumentStoreComponent" }, @@ -290,11 +280,6 @@ "title": "Return Embedding", "default": false, "type": "boolean" - }, - "label_index": { - "title": "Label Index", - "default": "default", - "type": "string" } }, "additionalProperties": false, @@ -464,13 +449,11 @@ }, "similarity": { "title": "Similarity", - "default": "dot_product", - "type": "string" + "default": "dot_product" }, "timeout": { "title": "Timeout", - "default": 30, - "type": "integer" + "default": 30 }, "return_embedding": { "title": "Return Embedding", @@ -622,21 +605,6 @@ "isolation_level": { "title": "Isolation Level", "type": "string" - }, - "n_links": { - "title": "N Links", - "default": 64, - "type": "integer" - }, - "ef_search": { - "title": "Ef Search", - "default": 20, - "type": "integer" - }, - "ef_construction": { - "title": "Ef Construction", - "default": 80, - "type": "integer" } }, "additionalProperties": false, @@ -833,504 +801,72 @@ "title": "Vector Dim", "type": "integer" }, - "embedding_dim": { - "title": "Embedding Dim", - "default": 768, - "type": "integer" - }, - "index_file_size": { - "title": "Index File Size", - "default": 1024, - "type": "integer" - }, - "similarity": { - "title": "Similarity", - "default": "dot_product", - "type": "string" - }, - "index_type": { - "title": "Index Type", - "default": "IVF_FLAT", - "type": "string" - }, - "index_param": { - "title": "Index Param", - "type": "object" - }, - "search_param": { - "title": "Search Param", - "type": "object" - }, - "return_embedding": { - "title": "Return Embedding", - "default": false, - "type": "boolean" - }, - "embedding_field": { - "title": "Embedding Field", - "default": "embedding", - "type": "string" - }, - "id_field": { - "title": "Id Field", - "default": "id", - "type": "string" - }, - "custom_fields": { - "title": "Custom Fields", - "type": "array", - "items": {} - }, - "progress_bar": { - "title": "Progress Bar", - "default": true, - "type": "boolean" - }, - "duplicate_documents": { - "title": "Duplicate Documents", - "default": "overwrite", - "type": "string" - }, - "isolation_level": { - "title": "Isolation Level", - "type": "string" - }, - "consistency_level": { - "title": "Consistency Level", - "default": 0, - "type": "integer" - } - }, - "additionalProperties": false, - "description": "Each parameter can reference other components defined in the same YAML file." - } - }, - "required": [ - "type", - "name" - ], - "additionalProperties": false - }, - "OpenDistroElasticsearchDocumentStoreComponent": { - "type": "object", - "properties": { - "name": { - "title": "Name", - "description": "Custom name for the component. Helpful for visualization and debugging.", - "type": "string" - }, - "type": { - "title": "Type", - "description": "Haystack Class name for the component.", - "type": "string", - "const": "OpenDistroElasticsearchDocumentStore" - }, - "params": { - "title": "Parameters", - "type": "object", - "properties": { - "scheme": { - "title": "Scheme", - "default": "https", - "type": "string" - }, - "username": { - "title": "Username", - "default": "admin", - "type": "string" - }, - "password": { - "title": "Password", - "default": "admin", - "type": "string" - }, - "host": { - "title": "Host", - "default": "localhost", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "port": { - "title": "Port", - "default": 9200, - "anyOf": [ - { - "type": "integer" - }, - { - "type": "array", - "items": { - "type": "integer" - } - } - ] - }, - "api_key_id": { - "title": "Api Key Id", - "type": "string" - }, - "api_key": { - "title": "Api Key", - "type": "string" - }, - "aws4auth": { - "title": "Aws4Auth" - }, - "index": { - "title": "Index", - "default": "document", - "type": "string" - }, - "label_index": { - "title": "Label Index", - "default": "label", - "type": "string" - }, - "search_fields": { - "title": "Search Fields", - "default": "content", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": {} - } - ] - }, - "content_field": { - "title": "Content Field", - "default": "content", - "type": "string" - }, - "name_field": { - "title": "Name Field", - "default": "name", - "type": "string" - }, - "embedding_field": { - "title": "Embedding Field", - "default": "embedding", - "type": "string" - }, - "embedding_dim": { - "title": "Embedding Dim", - "default": 768, - "type": "integer" - }, - "custom_mapping": { - "title": "Custom Mapping", - "type": "object" - }, - "excluded_meta_data": { - "title": "Excluded Meta Data", - "type": "array", - "items": {} - }, - "analyzer": { - "title": "Analyzer", - "default": "standard", - "type": "string" - }, - "ca_certs": { - "title": "Ca Certs", - "type": "string" - }, - "verify_certs": { - "title": "Verify Certs", - "default": false, - "type": "boolean" - }, - "recreate_index": { - "title": "Recreate Index", - "default": false, - "type": "boolean" - }, - "create_index": { - "title": "Create Index", - "default": true, - "type": "boolean" - }, - "refresh_type": { - "title": "Refresh Type", - "default": "wait_for", - "type": "string" - }, - "similarity": { - "title": "Similarity", - "default": "cosine", - "type": "string" - }, - "timeout": { - "title": "Timeout", - "default": 30, - "type": "integer" - }, - "return_embedding": { - "title": "Return Embedding", - "default": false, - "type": "boolean" - }, - "duplicate_documents": { - "title": "Duplicate Documents", - "default": "overwrite", - "type": "string" - }, - "index_type": { - "title": "Index Type", - "default": "flat", - "type": "string" - }, - "scroll": { - "title": "Scroll", - "default": "1d", - "type": "string" - }, - "skip_missing_embeddings": { - "title": "Skip Missing Embeddings", - "default": true, - "type": "boolean" - }, - "synonyms": { - "title": "Synonyms", - "type": "array", - "items": {} - }, - "synonym_type": { - "title": "Synonym Type", - "default": "synonym", - "type": "string" - }, - "use_system_proxy": { - "title": "Use System Proxy", - "default": false, - "type": "boolean" - } - }, - "additionalProperties": false, - "description": "Each parameter can reference other components defined in the same YAML file." - } - }, - "required": [ - "type", - "name" - ], - "additionalProperties": false - }, - "OpenSearchDocumentStoreComponent": { - "type": "object", - "properties": { - "name": { - "title": "Name", - "description": "Custom name for the component. Helpful for visualization and debugging.", - "type": "string" - }, - "type": { - "title": "Type", - "description": "Haystack Class name for the component.", - "type": "string", - "const": "OpenSearchDocumentStore" - }, - "params": { - "title": "Parameters", - "type": "object", - "properties": { - "scheme": { - "title": "Scheme", - "default": "https", - "type": "string" - }, - "username": { - "title": "Username", - "default": "admin", - "type": "string" - }, - "password": { - "title": "Password", - "default": "admin", - "type": "string" - }, - "host": { - "title": "Host", - "default": "localhost", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "port": { - "title": "Port", - "default": 9200, - "anyOf": [ - { - "type": "integer" - }, - { - "type": "array", - "items": { - "type": "integer" - } - } - ] - }, - "api_key_id": { - "title": "Api Key Id", - "type": "string" - }, - "api_key": { - "title": "Api Key", - "type": "string" - }, - "aws4auth": { - "title": "Aws4Auth" - }, - "index": { - "title": "Index", - "default": "document", - "type": "string" - }, - "label_index": { - "title": "Label Index", - "default": "label", - "type": "string" - }, - "search_fields": { - "title": "Search Fields", - "default": "content", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": {} - } - ] - }, - "content_field": { - "title": "Content Field", - "default": "content", - "type": "string" - }, - "name_field": { - "title": "Name Field", - "default": "name", - "type": "string" - }, - "embedding_field": { - "title": "Embedding Field", - "default": "embedding", - "type": "string" - }, - "embedding_dim": { - "title": "Embedding Dim", - "default": 768, - "type": "integer" - }, - "custom_mapping": { - "title": "Custom Mapping", - "type": "object" - }, - "excluded_meta_data": { - "title": "Excluded Meta Data", - "type": "array", - "items": {} - }, - "analyzer": { - "title": "Analyzer", - "default": "standard", - "type": "string" - }, - "ca_certs": { - "title": "Ca Certs", - "type": "string" - }, - "verify_certs": { - "title": "Verify Certs", - "default": false, - "type": "boolean" - }, - "recreate_index": { - "title": "Recreate Index", - "default": false, - "type": "boolean" - }, - "create_index": { - "title": "Create Index", - "default": true, - "type": "boolean" + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" }, - "refresh_type": { - "title": "Refresh Type", - "default": "wait_for", - "type": "string" + "index_file_size": { + "title": "Index File Size", + "default": 1024, + "type": "integer" }, "similarity": { "title": "Similarity", "default": "dot_product", "type": "string" }, - "timeout": { - "title": "Timeout", - "default": 30, - "type": "integer" + "index_type": { + "title": "Index Type", + "default": "IVF_FLAT", + "type": "string" + }, + "index_param": { + "title": "Index Param", + "type": "object" + }, + "search_param": { + "title": "Search Param", + "type": "object" }, "return_embedding": { "title": "Return Embedding", "default": false, "type": "boolean" }, - "duplicate_documents": { - "title": "Duplicate Documents", - "default": "overwrite", + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", "type": "string" }, - "index_type": { - "title": "Index Type", - "default": "flat", + "id_field": { + "title": "Id Field", + "default": "id", "type": "string" }, - "scroll": { - "title": "Scroll", - "default": "1d", - "type": "string" + "custom_fields": { + "title": "Custom Fields", + "type": "array", + "items": {} }, - "skip_missing_embeddings": { - "title": "Skip Missing Embeddings", + "progress_bar": { + "title": "Progress Bar", "default": true, "type": "boolean" }, - "synonyms": { - "title": "Synonyms", - "type": "array", - "items": {} + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" }, - "synonym_type": { - "title": "Synonym Type", - "default": "synonym", + "isolation_level": { + "title": "Isolation Level", "type": "string" }, - "use_system_proxy": { - "title": "Use System Proxy", - "default": false, - "type": "boolean" + "consistency_level": { + "title": "Consistency Level", + "default": 0, + "type": "integer" } }, "additionalProperties": false, @@ -1343,7 +879,7 @@ ], "additionalProperties": false }, - "PineconeDocumentStoreComponent": { + "OpenDistroElasticsearchDocumentStoreComponent": { "type": "object", "properties": { "name": { @@ -1355,80 +891,70 @@ "title": "Type", "description": "Haystack Class name for the component.", "type": "string", - "const": "PineconeDocumentStore" + "const": "OpenDistroElasticsearchDocumentStore" }, "params": { "title": "Parameters", "type": "object", "properties": { - "api_key": { - "title": "Api Key", - "type": "string" - }, - "environment": { - "title": "Environment", - "default": "us-west1-gcp", - "type": "string" - }, - "sql_url": { - "title": "Sql Url", - "default": "sqlite:///pinecone_document_store.db", - "type": "string" - }, - "pinecone_index": { - "title": "Pinecone Index", - "type": "string", - "default": null - }, - "embedding_dim": { - "title": "Embedding Dim", - "default": 768, - "type": "integer" - }, - "return_embedding": { - "title": "Return Embedding", - "default": false, - "type": "boolean" - }, - "index": { - "title": "Index", - "default": "document", - "type": "string" + "host": { + "title": "Host", + "default": "https://admin:admin@localhost:9200/" }, "similarity": { "title": "Similarity", - "default": "cosine", - "type": "string" - }, - "replicas": { - "title": "Replicas", - "default": 1, - "type": "integer" + "default": "cosine" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenSearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenSearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "verify_certs": { + "title": "Verify Certs", + "default": false }, - "shards": { - "title": "Shards", - "default": 1, - "type": "integer" + "scheme": { + "title": "Scheme", + "default": "https" }, - "embedding_field": { - "title": "Embedding Field", - "default": "embedding", - "type": "string" + "username": { + "title": "Username", + "default": "admin" }, - "progress_bar": { - "title": "Progress Bar", - "default": true, - "type": "boolean" + "password": { + "title": "Password", + "default": "admin" }, - "duplicate_documents": { - "title": "Duplicate Documents", - "default": "overwrite", - "type": "string" + "port": { + "title": "Port", + "default": 9200 } }, - "required": [ - "api_key" - ], "additionalProperties": false, "description": "Each parameter can reference other components defined in the same YAML file." } @@ -1684,13 +1210,6 @@ "title": "Merge Multiple Column Headers", "default": true, "type": "boolean" - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "required": [ @@ -1749,13 +1268,6 @@ "overwrite_existing_files": { "title": "Overwrite Existing Files", "default": true - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "required": [ @@ -1883,6 +1395,9 @@ "type": "array", "items": { "anyOf": [ + { + "type": "integer" + }, { "type": "string" }, @@ -1974,13 +1489,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2020,11 +1528,6 @@ "default": 10, "type": "integer" }, - "all_terms_must_match": { - "title": "All Terms Must Match", - "default": false, - "type": "boolean" - }, "custom_query": { "title": "Custom Query", "type": "string" @@ -2070,11 +1573,6 @@ "default": 10, "type": "integer" }, - "all_terms_must_match": { - "title": "All Terms Must Match", - "default": false, - "type": "boolean" - }, "custom_query": { "title": "Custom Query", "type": "string" @@ -2168,6 +1666,9 @@ "type": "array", "items": { "anyOf": [ + { + "type": "integer" + }, { "type": "string" }, @@ -2375,14 +1876,6 @@ "default": true, "type": "boolean" }, - "devices": { - "title": "Devices", - "default": [], - "type": "array", - "items": { - "type": "string" - } - }, "no_ans_boost": { "title": "No Ans Boost", "default": 0.0, @@ -2437,10 +1930,6 @@ "default": true, "type": "boolean" }, - "confidence_threshold": { - "title": "Confidence Threshold", - "type": "number" - }, "proxies": { "title": "Proxies", "type": "object", @@ -2556,13 +2045,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2694,13 +2176,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2742,13 +2217,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2793,13 +2261,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2884,13 +2345,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -2968,13 +2422,6 @@ "title": "Language", "default": "en", "type": "string" - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -3092,9 +2539,14 @@ "default": null }, "generator_type": { - "title": "Generator Type", - "default": "token", - "type": "string" + "default": [ + 1 + ], + "allOf": [ + { + "$ref": "#/definitions/RAGeneratorType" + } + ] }, "top_k": { "title": "Top K", @@ -3141,6 +2593,16 @@ ], "additionalProperties": false }, + "RAGeneratorType": { + "title": "RAGeneratorType", + "description": "An enumeration.", + "enum": [ + [ + 1 + ], + 2 + ] + }, "RCIReaderComponent": { "type": "object", "properties": { @@ -3301,6 +2763,9 @@ "type": "array", "items": { "anyOf": [ + { + "type": "integer" + }, { "type": "string" }, @@ -3643,6 +3108,9 @@ "type": "array", "items": { "anyOf": [ + { + "type": "integer" + }, { "type": "string" }, @@ -3750,13 +3218,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, @@ -3848,13 +3309,6 @@ "items": { "type": "string" } - }, - "id_hash_keys": { - "title": "Id Hash Keys", - "type": "array", - "items": { - "type": "string" - } } }, "additionalProperties": false, diff --git a/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json b/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json new file mode 100644 index 0000000000..c6674beae9 --- /dev/null +++ b/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json @@ -0,0 +1,3648 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.2.0.schema.json", + "title": "Haystack Pipeline", + "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version of the Haystack Pipeline file.", + "type": "string", + "const": "1.2.0" + }, + "components": { + "title": "Components", + "description": "Component nodes and their configurations, to later be used in the pipelines section. Define here all the building blocks for the pipelines.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/DeepsetCloudDocumentStoreComponent" + }, + { + "$ref": "#/definitions/ElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/FAISSDocumentStoreComponent" + }, + { + "$ref": "#/definitions/GraphDBKnowledgeGraphComponent" + }, + { + "$ref": "#/definitions/InMemoryDocumentStoreComponent" + }, + { + "$ref": "#/definitions/Milvus2DocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenDistroElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenSearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/SQLDocumentStoreComponent" + }, + { + "$ref": "#/definitions/WeaviateDocumentStoreComponent" + }, + { + "$ref": "#/definitions/AzureConverterComponent" + }, + { + "$ref": "#/definitions/CrawlerComponent" + }, + { + "$ref": "#/definitions/DensePassageRetrieverComponent" + }, + { + "$ref": "#/definitions/Docs2AnswersComponent" + }, + { + "$ref": "#/definitions/DocxToTextConverterComponent" + }, + { + "$ref": "#/definitions/ElasticsearchFilterOnlyRetrieverComponent" + }, + { + "$ref": "#/definitions/ElasticsearchRetrieverComponent" + }, + { + "$ref": "#/definitions/EmbeddingRetrieverComponent" + }, + { + "$ref": "#/definitions/EntityExtractorComponent" + }, + { + "$ref": "#/definitions/EvalAnswersComponent" + }, + { + "$ref": "#/definitions/EvalDocumentsComponent" + }, + { + "$ref": "#/definitions/FARMReaderComponent" + }, + { + "$ref": "#/definitions/FileTypeClassifierComponent" + }, + { + "$ref": "#/definitions/ImageToTextConverterComponent" + }, + { + "$ref": "#/definitions/JoinAnswersComponent" + }, + { + "$ref": "#/definitions/JoinDocumentsComponent" + }, + { + "$ref": "#/definitions/MarkdownConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextOCRConverterComponent" + }, + { + "$ref": "#/definitions/ParsrConverterComponent" + }, + { + "$ref": "#/definitions/PreProcessorComponent" + }, + { + "$ref": "#/definitions/QuestionGeneratorComponent" + }, + { + "$ref": "#/definitions/RAGeneratorComponent" + }, + { + "$ref": "#/definitions/RCIReaderComponent" + }, + { + "$ref": "#/definitions/RouteDocumentsComponent" + }, + { + "$ref": "#/definitions/SentenceTransformersRankerComponent" + }, + { + "$ref": "#/definitions/Seq2SeqGeneratorComponent" + }, + { + "$ref": "#/definitions/SklearnQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TableReaderComponent" + }, + { + "$ref": "#/definitions/TableTextRetrieverComponent" + }, + { + "$ref": "#/definitions/Text2SparqlRetrieverComponent" + }, + { + "$ref": "#/definitions/TextConverterComponent" + }, + { + "$ref": "#/definitions/TfidfRetrieverComponent" + }, + { + "$ref": "#/definitions/TikaConverterComponent" + }, + { + "$ref": "#/definitions/TransformersDocumentClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersReaderComponent" + }, + { + "$ref": "#/definitions/TransformersSummarizerComponent" + }, + { + "$ref": "#/definitions/TransformersTranslatorComponent" + } + ] + }, + "required": [ + "type", + "name" + ], + "additionalProperties": true + }, + "pipelines": { + "title": "Pipelines", + "description": "Multiple pipelines can be defined using the components from the same YAML file.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Name of the pipeline.", + "type": "string" + }, + "nodes": { + "title": "Nodes", + "description": "Nodes to be used by this particular pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of this particular node in the pipeline. This should be one of the names from the components defined in the same file.", + "type": "string" + }, + "inputs": { + "title": "Inputs", + "description": "Input parameters for this node.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "name", + "inputs" + ], + "additionalProperties": false + }, + "required": [ + "name", + "nodes" + ], + "additionalProperties": false + }, + "additionalProperties": false + }, + "additionalProperties": false + } + } + }, + "required": [ + "version", + "components", + "pipelines" + ], + "additionalProperties": false, + "definitions": { + "DeepsetCloudDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DeepsetCloudDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "workspace": { + "title": "Workspace", + "default": "default", + "type": "string" + }, + "index": { + "title": "Index", + "default": "default", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "api_endpoint": { + "title": "Api Endpoint", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 9200, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "api_key_id": { + "title": "Api Key Id", + "type": "string" + }, + "api_key": { + "title": "Api Key", + "type": "string" + }, + "aws4auth": { + "title": "Aws4Auth" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "search_fields": { + "title": "Search Fields", + "default": "content", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": {} + } + ] + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "custom_mapping": { + "title": "Custom Mapping", + "type": "object" + }, + "excluded_meta_data": { + "title": "Excluded Meta Data", + "type": "array", + "items": {} + }, + "analyzer": { + "title": "Analyzer", + "default": "standard", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "http", + "type": "string" + }, + "ca_certs": { + "title": "Ca Certs", + "type": "string" + }, + "verify_certs": { + "title": "Verify Certs", + "default": true, + "type": "boolean" + }, + "recreate_index": { + "title": "Recreate Index", + "default": false, + "type": "boolean" + }, + "create_index": { + "title": "Create Index", + "default": true, + "type": "boolean" + }, + "refresh_type": { + "title": "Refresh Type", + "default": "wait_for", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product" + }, + "timeout": { + "title": "Timeout", + "default": 30 + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "flat", + "type": "string" + }, + "scroll": { + "title": "Scroll", + "default": "1d", + "type": "string" + }, + "skip_missing_embeddings": { + "title": "Skip Missing Embeddings", + "default": true, + "type": "boolean" + }, + "synonyms": { + "title": "Synonyms", + "type": "array", + "items": {} + }, + "synonym_type": { + "title": "Synonym Type", + "default": "synonym", + "type": "string" + }, + "use_system_proxy": { + "title": "Use System Proxy", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FAISSDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FAISSDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///faiss_document_store.db", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "faiss_index_factory_str": { + "title": "Faiss Index Factory Str", + "default": "Flat", + "type": "string" + }, + "faiss_index": { + "title": "Faiss Index", + "type": "string", + "default": null + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "faiss_index_path": { + "title": "Faiss Index Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "faiss_config_path": { + "title": "Faiss Config Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "GraphDBKnowledgeGraphComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "GraphDBKnowledgeGraph" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": 7200, + "type": "integer" + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "index": { + "title": "Index", + "type": "string" + }, + "prefixes": { + "title": "Prefixes", + "default": "", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "InMemoryDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "InMemoryDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "scoring_batch_size": { + "title": "Scoring Batch Size", + "default": 500000, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Milvus2DocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Milvus2DocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///", + "type": "string" + }, + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": "19530", + "type": "string" + }, + "connection_pool": { + "title": "Connection Pool", + "default": "SingletonThread", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "index_file_size": { + "title": "Index File Size", + "default": 1024, + "type": "integer" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "IVF_FLAT", + "type": "string" + }, + "index_param": { + "title": "Index Param", + "type": "object" + }, + "search_param": { + "title": "Search Param", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "id_field": { + "title": "Id Field", + "default": "id", + "type": "string" + }, + "custom_fields": { + "title": "Custom Fields", + "type": "array", + "items": {} + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + }, + "consistency_level": { + "title": "Consistency Level", + "default": 0, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenDistroElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenDistroElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "https://admin:admin@localhost:9200/" + }, + "similarity": { + "title": "Similarity", + "default": "cosine" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenSearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenSearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "verify_certs": { + "title": "Verify Certs", + "default": false + }, + "scheme": { + "title": "Scheme", + "default": "https" + }, + "username": { + "title": "Username", + "default": "admin" + }, + "password": { + "title": "Password", + "default": "admin" + }, + "port": { + "title": "Port", + "default": 9200 + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SQLDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SQLDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "url": { + "title": "Url", + "default": "sqlite://", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "check_same_thread": { + "title": "Check Same Thread", + "default": false, + "type": "boolean" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "WeaviateDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "WeaviateDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "http://localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 8080, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "timeout_config": { + "title": "Timeout Config", + "default": [ + 5, + 15 + ], + "type": "array", + "items": {} + }, + "username": { + "title": "Username", + "type": "string" + }, + "password": { + "title": "Password", + "type": "string" + }, + "index": { + "title": "Index", + "default": "Document", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "cosine", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "hnsw", + "type": "string" + }, + "custom_schema": { + "title": "Custom Schema", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "AzureConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "AzureConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "endpoint": { + "title": "Endpoint", + "type": "string" + }, + "credential_key": { + "title": "Credential Key", + "type": "string" + }, + "model_id": { + "title": "Model Id", + "default": "prebuilt-document", + "type": "string" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + }, + "save_json": { + "title": "Save Json", + "default": false, + "type": "boolean" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "merge_multiple_column_headers": { + "title": "Merge Multiple Column Headers", + "default": true, + "type": "boolean" + } + }, + "required": [ + "endpoint", + "credential_key" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "CrawlerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Crawler" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "output_dir": { + "title": "Output Dir", + "type": "string" + }, + "urls": { + "title": "Urls", + "type": "array", + "items": { + "type": "string" + } + }, + "crawler_depth": { + "title": "Crawler Depth", + "default": 1, + "type": "integer" + }, + "filter_urls": { + "title": "Filter Urls", + "type": "array", + "items": {} + }, + "overwrite_existing_files": { + "title": "Overwrite Existing Files", + "default": true + } + }, + "required": [ + "output_dir" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DensePassageRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DensePassageRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "facebook/dpr-question_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "facebook/dpr-ctx_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Docs2AnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Docs2Answers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": {}, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DocxToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DocxToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchFilterOnlyRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchFilterOnlyRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EmbeddingRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EmbeddingRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "embedding_model": { + "title": "Embedding Model", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 32, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 512, + "type": "integer" + }, + "model_format": { + "title": "Model Format", + "default": "farm", + "type": "string" + }, + "pooling_strategy": { + "title": "Pooling Strategy", + "default": "reduce_mean", + "type": "string" + }, + "emb_extraction_layer": { + "title": "Emb Extraction Layer", + "default": -1, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store", + "embedding_model" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EntityExtractorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EntityExtractor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "dslim/bert-base-NER", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "skip_incorrect_retrieval": { + "title": "Skip Incorrect Retrieval", + "default": true, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "sas_model": { + "title": "Sas Model", + "type": "string" + }, + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FARMReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FARMReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 150, + "type": "integer" + }, + "batch_size": { + "title": "Batch Size", + "default": 50, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "no_ans_boost": { + "title": "No Ans Boost", + "default": 0.0, + "type": "number" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "top_k_per_sample": { + "title": "Top K Per Sample", + "default": 1, + "type": "integer" + }, + "num_processes": { + "title": "Num Processes", + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_filtering": { + "title": "Duplicate Filtering", + "default": 0, + "type": "integer" + }, + "use_confidence_scores": { + "title": "Use Confidence Scores", + "default": true, + "type": "boolean" + }, + "proxies": { + "title": "Proxies", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "local_files_only": { + "title": "Local Files Only", + "default": false + }, + "force_download": { + "title": "Force Download", + "default": false + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FileTypeClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FileTypeClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "supported_types": { + "title": "Supported Types", + "default": [ + "txt", + "pdf", + "md", + "docx", + "html" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ImageToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ImageToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "MarkdownConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "MarkdownConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextOCRConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextOCRConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ParsrConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ParsrConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "parsr_url": { + "title": "Parsr Url", + "default": "http://localhost:3001", + "type": "string" + }, + "extractor": { + "title": "Extractor", + "default": "pdfminer", + "enum": [ + "pdfminer", + "pdfjs" + ], + "type": "string" + }, + "table_detection_mode": { + "title": "Table Detection Mode", + "default": "lattice", + "enum": [ + "lattice", + "stream" + ], + "type": "string" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "remove_page_headers": { + "title": "Remove Page Headers", + "default": false, + "type": "boolean" + }, + "remove_page_footers": { + "title": "Remove Page Footers", + "default": false, + "type": "boolean" + }, + "remove_table_of_contents": { + "title": "Remove Table Of Contents", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PreProcessorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PreProcessor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "clean_whitespace": { + "title": "Clean Whitespace", + "default": true, + "type": "boolean" + }, + "clean_header_footer": { + "title": "Clean Header Footer", + "default": false, + "type": "boolean" + }, + "clean_empty_lines": { + "title": "Clean Empty Lines", + "default": true, + "type": "boolean" + }, + "remove_substrings": { + "title": "Remove Substrings", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "split_by": { + "title": "Split By", + "default": "word", + "type": "string" + }, + "split_length": { + "title": "Split Length", + "default": 200, + "type": "integer" + }, + "split_overlap": { + "title": "Split Overlap", + "default": 0, + "type": "integer" + }, + "split_respect_sentence_boundary": { + "title": "Split Respect Sentence Boundary", + "default": true, + "type": "boolean" + }, + "language": { + "title": "Language", + "default": "en", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "QuestionGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "QuestionGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "valhalla/t5-base-e2e-qg" + }, + "model_version": { + "title": "Model Version" + }, + "num_beams": { + "title": "Num Beams", + "default": 4 + }, + "max_length": { + "title": "Max Length", + "default": 256 + }, + "no_repeat_ngram_size": { + "title": "No Repeat Ngram Size", + "default": 3 + }, + "length_penalty": { + "title": "Length Penalty", + "default": 1.5 + }, + "early_stopping": { + "title": "Early Stopping", + "default": true + }, + "split_length": { + "title": "Split Length", + "default": 50 + }, + "split_overlap": { + "title": "Split Overlap", + "default": 10 + }, + "use_gpu": { + "title": "Use Gpu", + "default": true + }, + "prompt": { + "title": "Prompt", + "default": "generate questions:" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RAGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "facebook/rag-token-nq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "retriever": { + "title": "Retriever", + "type": "string", + "default": null + }, + "generator_type": { + "default": [ + 1 + ], + "allOf": [ + { + "$ref": "#/definitions/RAGeneratorType" + } + ] + }, + "top_k": { + "title": "Top K", + "default": 2, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 2, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "prefix": { + "title": "Prefix", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorType": { + "title": "RAGeneratorType", + "description": "An enumeration.", + "enum": [ + [ + 1 + ], + 2 + ] + }, + "RCIReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RCIReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "row_model_name_or_path": { + "title": "Row Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-row", + "type": "string" + }, + "column_model_name_or_path": { + "title": "Column Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-col", + "type": "string" + }, + "row_model_version": { + "title": "Row Model Version", + "type": "string" + }, + "column_model_version": { + "title": "Column Model Version", + "type": "string" + }, + "row_tokenizer": { + "title": "Row Tokenizer", + "type": "string" + }, + "column_tokenizer": { + "title": "Column Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RouteDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RouteDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SentenceTransformersRankerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SentenceTransformersRanker" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Seq2SeqGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Seq2SeqGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "input_converter": { + "title": "Input Converter", + "type": "string", + "default": null + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 8, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SklearnQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SklearnQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/model.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + }, + "vectorizer_name_or_path": { + "title": "Vectorizer Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/vectorizer.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/tapas-base-finetuned-wtq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableTextRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableTextRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "deepset/bert-small-mm_retrieval-question_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "deepset/bert-small-mm_retrieval-passage_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "table_embedding_model": { + "title": "Table Embedding Model", + "default": "deepset/bert-small-mm_retrieval-table_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "max_seq_len_table": { + "title": "Max Seq Len Table", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_meta_fields": { + "title": "Embed Meta Fields", + "default": [ + "name", + "section_title", + "caption" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Text2SparqlRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Text2SparqlRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "knowledge_graph": { + "title": "Knowledge Graph" + }, + "model_name_or_path": { + "title": "Model Name Or Path" + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + } + }, + "required": [ + "knowledge_graph", + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TfidfRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TfidfRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "auto_fit": { + "title": "Auto Fit", + "default": true + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TikaConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TikaConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "tika_url": { + "title": "Tika Url", + "default": "http://localhost:9998/tika", + "type": "string" + }, + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersDocumentClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersDocumentClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "bhadresh-savani/distilbert-base-uncased-emotion", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "return_all_scores": { + "title": "Return All Scores", + "default": false, + "type": "boolean" + }, + "task": { + "title": "Task", + "default": "text-classification", + "type": "string" + }, + "labels": { + "title": "Labels", + "type": "array", + "items": { + "type": "string" + } + }, + "batch_size": { + "title": "Batch Size", + "default": -1, + "type": "integer" + }, + "classification_field": { + "title": "Classification Field", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "shahrukhx01/bert-mini-finetune-question-detection", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "distilbert-base-uncased-distilled-squad", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 70, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 4, + "type": "integer" + }, + "return_no_answers": { + "title": "Return No Answers", + "default": true, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersSummarizerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersSummarizer" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/pegasus-xsum", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 5, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "separator_for_single_summary": { + "title": "Separator For Single Summary", + "default": " ", + "type": "string" + }, + "generate_single_summary": { + "title": "Generate Single Summary", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersTranslatorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersTranslator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "tokenizer_name": { + "title": "Tokenizer Name", + "type": "string" + }, + "max_seq_len": { + "title": "Max Seq Len", + "type": "integer" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-1.3.0.schema.json similarity index 99% rename from haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json rename to haystack/json-schemas/haystack-pipeline-1.3.0.schema.json index f6d6cc955f..5b5f1bed0e 100644 --- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.3.0.schema.json @@ -9,14 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "1.2.1rc0" - }, - { - "const": "1.3.0" - } - ] + "const": "1.3.0" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-master.schema.json similarity index 99% rename from haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json rename to haystack/json-schemas/haystack-pipeline-master.schema.json index 73418303bf..78d8f20666 100644 --- a/haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-master.schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://haystack.deepset.ai/haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json", + "$id": "https://haystack.deepset.ai/haystack/json-schemas/haystack-pipeline-master.schema.json", "title": "Haystack Pipeline", "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", "type": "object", @@ -9,11 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "1.3.1rc0" - } - ] + "const": "ignore" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline.schema.json b/haystack/json-schemas/haystack-pipeline.schema.json index 5418bc1186..4ae84b3e8a 100644 --- a/haystack/json-schemas/haystack-pipeline.schema.json +++ b/haystack/json-schemas/haystack-pipeline.schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.1.0.schema.json", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline.schema.json", "title": "Haystack Pipeline", "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", "type": "object", @@ -10,16 +10,12 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "unstable" - } - ] + "const": "ignore" } } }, { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-unstable.schema.json" + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-master.schema.json" } ] }, @@ -28,17 +24,7 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "1.0.0" - }, - { - "const": "1.1.0" - }, - { - "const": "1.2.0" - } - ] + "const": "1.0.0" } } }, @@ -52,19 +38,12 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "1.2.1rc0" - }, - { - "const": "1.3.0" - } - ] + "const": "1.1.0" } } }, { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.1rc0.schema.json" + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.1.0.schema.json" } ] }, @@ -73,16 +52,26 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "1.3.1rc0" - } - ] + "const": "1.2.0" } } }, { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.3.1rc0.schema.json" + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.0.schema.json" + } + ] + }, + { + "allOf": [ + { + "properties": { + "version": { + "const": "1.3.0" + } + } + }, + { + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.3.0.schema.json" } ] } diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 02de56af37..6338f811d4 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -1,15 +1,10 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type -import logging - -import os -import re import sys import json import inspect +import logging from pathlib import Path -from copy import deepcopy -from difflib import SequenceMatcher import pydantic.schema from pydantic import BaseConfig, BaseSettings, Required, SecretStr, create_model @@ -225,9 +220,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[ return component_schema, {"$ref": f"#/definitions/{component_name}"} -def get_json_schema( - filename: str, compatible_versions: List[str], modules: List[str] = ["haystack.document_stores", "haystack.nodes"] -): +def get_json_schema(filename: str, version: str, modules: List[str] = ["haystack.document_stores", "haystack.nodes"]): """ Generate JSON schema for Haystack pipelines. """ @@ -254,7 +247,7 @@ def get_json_schema( "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [{"const": version} for version in compatible_versions], + "const": version, }, "components": { "title": "Components", @@ -331,171 +324,38 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st return schema -def natural_sort(list_to_sort: List[str]) -> List[str]: - """Sorts a list keeping numbers in the correct numerical order""" - convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanumeric_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)] - return sorted(list_to_sort, key=alphanumeric_key) - - -def load(path: Path) -> Dict[str, Any]: - """Shorthand for loading a JSON""" - with open(path, "r") as json_file: - return json.load(json_file) - - -def dump(data: Dict[str, Any], path: Path) -> None: - """Shorthand for dumping to JSON""" - with open(path, "w") as json_file: - json.dump(data, json_file, indent=2) - - -def new_version_entry(version): +def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): """ - Returns a new entry for the version index JSON schema. + If the version contains "rc", only update master's schema. + Otherwise, create (or update) a new schema. """ - return { - "allOf": [ - {"properties": {"version": {"oneOf": [{"const": version}]}}}, - { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/" - f"haystack-pipeline-{version}.schema.json" - }, - ] - } - + # Update masters's schema + filename = f"haystack-pipeline-master.schema.json" + with open(destination_path / filename, "w") as json_file: + json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2) -def update_json_schema( - update_index: bool, destination_path: Path = JSON_SCHEMAS_PATH, index_name: str = "haystack-pipeline.schema.json" -): - # Locate the latest schema's path - latest_schema_path = destination_path / Path( - natural_sort(os.listdir(destination_path))[-3] - ) # -1 is index, -2 is unstable - logger.info(f"Latest schema: {latest_schema_path}") - latest_schema = load(latest_schema_path) - - # List the versions supported by the last schema - supported_versions_block = deepcopy(latest_schema["properties"]["version"]["oneOf"]) - supported_versions = [entry["const"].replace('"', "") for entry in supported_versions_block] - logger.info(f"Versions supported by this schema: {supported_versions}") - - # Create new schema with the same filename and versions embedded, to be identical to the latest one. - new_schema = get_json_schema(latest_schema_path.name, supported_versions) - - # Check for backwards compatibility with difflib's SequenceMatcher - # (https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher) - # If the opcodes contain only "insert" and "equal", that means the new schema - # only added lines and did not remove anything from the previous schema. - # We decided that additions only imply backwards compatibility. - # Any other opcode ("replace", "delete") imply that something has been removed - # in the new schema, which breaks backwards compatibility and means we should - # store a new, separate schema. - # People wishing to upgrade from the older schema version will have to change - # version in their YAML to avoid failing validation. - latest_schema_string = json.dumps(latest_schema) - new_schema_string = json.dumps(new_schema) - matcher = SequenceMatcher(None, latest_schema_string, new_schema_string) - schema_diff = matcher.get_opcodes() - is_backwards_incompatible = any(opcode[0] not in ["insert", "equal"] for opcode in schema_diff) - - unstable_versions_block = [] - - # If the two schemas are incompatible, we need a new file. - # Update the schema's filename and supported versions, then save it. - if is_backwards_incompatible: - - # Print a quick diff to explain the differences - logger.info(f"The schemas are NOT backwards compatible. This is the list of INCOMPATIBLE changes only:") - for tag, i1, i2, j1, j2 in schema_diff: - if tag not in ["equal", "insert"]: - logger.info("{!r:>8} --> {!r}".format(latest_schema_string[i1:i2], new_schema_string[j1:j2])) + # If it's not an rc version: + if "rc" not in haystack_version: + # Create/update the specific version file too filename = f"haystack-pipeline-{haystack_version}.schema.json" - logger.info(f"Adding {filename} to the schema folder.") - - # Let's check if the schema changed without a version change - if haystack_version in supported_versions and len(supported_versions) > 1: - logger.info( - f"Version {haystack_version} was supported by the latest schema" - f"(supported versions: {supported_versions}). " - f"Removing support for version {haystack_version} from it." - ) - - supported_versions_block = [ - entry for entry in supported_versions_block if entry["const"].replace('"', "") != haystack_version - ] - latest_schema["properties"]["version"]["oneOf"] = supported_versions_block - dump(latest_schema, latest_schema_path) - - # Update the JSON schema index too - if update_index: - index = load(destination_path / index_name) - index["oneOf"][-1]["allOf"][0]["properties"]["version"]["oneOf"] = supported_versions_block - dump(index, destination_path / index_name) - - # Dump the new schema file - new_schema["$id"] = f"{SCHEMA_URL}{filename}" - unstable_versions_block = [{"const": haystack_version}] - new_schema["properties"]["version"]["oneOf"] = [{"const": haystack_version}] - dump(new_schema, destination_path / filename) - logger.info(f"Schema saved in {destination_path / filename}") - - # Update schema index with a whole new entry - if update_index: - index = load(destination_path / index_name) - new_entry = new_version_entry(haystack_version) - if all(new_entry != entry for entry in index["oneOf"]): - index["oneOf"].append(new_version_entry(haystack_version)) - dump(index, destination_path / index_name) - - # If the two schemas are compatible, no need to write a new one: - # Just add the new version to the list of versions supported by - # the latest schema if it's not there yet - else: - - # Print a quick diff to explain the differences - if not schema_diff or all(tag[0] == "equal" for tag in schema_diff): - logger.info("The schemas are identical, won't create a new file.") - else: - logger.info("The schemas are backwards compatible, overwriting the latest schema.") - logger.info("This is the list of changes:") - for tag, i1, i2, j1, j2 in schema_diff: - if tag not in "equal": - logger.info("{!r:>8} --> {!r}".format(latest_schema_string[i1:i2], new_schema_string[j1:j2])) - - # Overwrite the latest schema (safe to do for additions) - dump(new_schema, latest_schema_path) - - if haystack_version in supported_versions: - unstable_versions_block = supported_versions_block - logger.info( - f"Version {haystack_version} was already supported " f"(supported versions: {supported_versions})" - ) - else: - logger.info( - f"This version ({haystack_version}) was not listed " - f"(supported versions: {supported_versions}): " - "updating the supported versions list." + with open(destination_path / filename, "w") as json_file: + json.dump(get_json_schema(filename=filename, version=haystack_version), json_file, indent=2) + + # Update the index + index_name = "haystack-pipeline.schema.json" + with open(destination_path / index_name, "r") as json_file: + index = json.load(json_file) + index["oneOf"].append( + { + "allOf": [ + {"properties": {"version": {"const": haystack_version}}}, + { + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/" + f"haystack-pipeline-{haystack_version}.schema.json" + }, + ] + } ) - - # Updating the latest schema's list of supported versions - supported_versions_block.append({"const": haystack_version}) - unstable_versions_block = supported_versions_block - latest_schema["properties"]["version"]["oneOf"] = supported_versions_block - dump(latest_schema, latest_schema_path) - logger.info(f"Schema updated in {destination_path / latest_schema_path}") - - # Update the JSON schema index too - if update_index: - index = load(destination_path / index_name) - index["oneOf"][-1]["allOf"][0]["properties"]["version"]["oneOf"] = supported_versions_block - dump(index, destination_path / index_name) - - # Update the unstable schema (for tests and internal use). - unstable_filename = "haystack-pipeline-unstable.schema.json" - unstable_schema = deepcopy(new_schema) - unstable_schema["$id"] = f"{SCHEMA_URL}{unstable_filename}" - unstable_schema["properties"]["version"]["oneOf"] = [{"const": "unstable"}] + unstable_versions_block - dump(unstable_schema, destination_path / unstable_filename) - logger.info(f"Unstable schema saved in {destination_path / unstable_filename}") + with open(destination_path / index_name, "w") as json_file: + json.dump(index, json_file, indent=2) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 3600208fb9..70963bbacf 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -136,7 +136,11 @@ def to_notebook_cell( @classmethod @abstractmethod def load_from_config( - cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True + cls, + pipeline_config: Dict, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -182,6 +186,7 @@ def load_from_config( to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) """ raise NotImplementedError("This is an abstract method. Use Pipeline or RayPipeline instead.") @@ -1066,7 +1071,13 @@ def draw(self, path: Path = Path("pipeline.png")): graphviz.draw(path) @classmethod - def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True): + def load_from_yaml( + cls, + path: Path, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, + ): """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must @@ -1111,6 +1122,7 @@ def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwri to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) """ pipeline_config = read_pipeline_config_from_yaml(path) @@ -1118,11 +1130,16 @@ def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwri pipeline_config=pipeline_config, pipeline_name=pipeline_name, overwrite_with_env_variables=overwrite_with_env_variables, + strict_version_check=strict_version_check, ) @classmethod def load_from_config( - cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True + cls, + pipeline_config: Dict, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -1168,8 +1185,9 @@ def load_from_config( to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise). """ - validate_config(pipeline_config) + validate_config(pipeline_config, strict_version_check=strict_version_check) pipeline_definition = get_pipeline_definition(pipeline_config=pipeline_config, pipeline_name=pipeline_name) component_definitions = get_component_definitions( @@ -1396,6 +1414,7 @@ def load_from_config( pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, address: Optional[str] = None, **kwargs, ): @@ -1430,12 +1449,13 @@ def load_from_config( return pipeline @classmethod - def load_from_yaml( + def load_from_yaml( # type: ignore cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, + strict_version_check: bool = False, **kwargs, ): """ diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 6a4fe0f1c8..f34bfc0a15 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -144,58 +144,61 @@ def build_component_dependency_graph( return graph -def validate_yaml(path: Path): +def validate_yaml(path: Path, strict_version_check: bool = False): """ Validates the given YAML file using the autogenerated JSON schema. :param pipeline_config: the configuration to validate + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ pipeline_config = read_pipeline_config_from_yaml(path) - validate_config(pipeline_config=pipeline_config) + validate_config(pipeline_config=pipeline_config, strict_version_check=strict_version_check) logging.debug(f"'{path}' contains valid Haystack pipelines.") -def validate_config(pipeline_config: Dict) -> None: +def validate_config(pipeline_config: Dict, strict_version_check: bool = False) -> None: """ Validates the given configuration using the autogenerated JSON schema. :param pipeline_config: the configuration to validate + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ validate_config_strings(pipeline_config) - with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-unstable.schema.json", "r") as schema_file: + # Check for the version manually (to avoid validation errors) + pipeline_version = pipeline_config.get("version", None) + + if pipeline_version != __version__: + if strict_version_check: + raise PipelineConfigError( + f"Cannot load pipeline configuration of version {pipeline_version} " + f"in Haystack version {__version__}\n" + "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " + "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " + "and fix your configuration accordingly." + ) + logging.warning( + f"This pipeline is version {pipeline_version}, but you're using Haystack {__version__}\n" + "This might cause bugs and unexpected behaviors." + "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " + "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " + "and fix your configuration accordingly." + ) + + with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-master.schema.json", "r") as schema_file: schema = json.load(schema_file) - compatible_versions = [version["const"].replace('"', "") for version in schema["properties"]["version"]["oneOf"]] - loaded_custom_nodes = [] + # Remove the version value from the schema to prevent validation errors on it - a version only have to be present. + del schema["properties"]["version"]["const"] + loaded_custom_nodes = [] while True: - try: Draft7Validator(schema).validate(instance=pipeline_config) - - if pipeline_config["version"] == "unstable": - logging.warning( - "You seem to be using the 'unstable' version of the schema to validate " - "your pipeline configuration.\n" - "This is NOT RECOMMENDED in production environments, as pipelines " - "might manage to load and then misbehave without warnings.\n" - f"Please pin your configurations to '{__version__}' to ensure stability." - ) - - elif pipeline_config["version"] not in compatible_versions: - raise PipelineConfigError( - f"Cannot load pipeline configuration of version {pipeline_config['version']} " - f"in Haystack version {__version__} " - f"(only versions {compatible_versions} are compatible with this Haystack release).\n" - "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " - "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " - "and fix your configuration accordingly." - ) break except ValidationError as validation: diff --git a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml index 569f19dfad..ba4882666f 100644 --- a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml +++ b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml @@ -1,5 +1,5 @@ # Dummy pipeline, used when the CI needs to load the REST API to extract the OpenAPI specs. DO NOT USE. -version: 'unstable' +version: ignore components: - name: FileTypeClassifier diff --git a/rest_api/pipeline/pipelines.haystack-pipeline.yml b/rest_api/pipeline/pipelines.haystack-pipeline.yml index b32a62c66c..a65fc0a7cc 100644 --- a/rest_api/pipeline/pipelines.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'unstable' +version: ignore components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml index 72cdf0f63f..13395f691f 100644 --- a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'unstable' +version: ignore components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/test/samples/dc/pipeline_config.json b/test/samples/dc/pipeline_config.json index 5594f4a0a5..b197a497ce 100644 --- a/test/samples/dc/pipeline_config.json +++ b/test/samples/dc/pipeline_config.json @@ -1,5 +1,5 @@ { - "version": "unstable", + "version": "master", "name": "document_retrieval_1", "components": [ { diff --git a/test/samples/pipeline/test_pipeline.yaml b/test/samples/pipeline/test_pipeline.yaml index b1306ce604..4dea24273f 100644 --- a/test/samples/pipeline/test_pipeline.yaml +++ b/test/samples/pipeline/test_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: ignore components: - name: Reader diff --git a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml index 9fb2a254f9..db2f83a0db 100644 --- a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: ignore components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml index 89a5cbf48c..462826b923 100644 --- a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: ignore components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml index b954d42827..3bc0e1103e 100644 --- a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml +++ b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: ignore components: - name: Reader diff --git a/test/samples/pipeline/test_ray_pipeline.yaml b/test/samples/pipeline/test_ray_pipeline.yaml index 3ec3864b04..95b480fdbb 100644 --- a/test/samples/pipeline/test_ray_pipeline.yaml +++ b/test/samples/pipeline/test_ray_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: ignore components: - name: Reader diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 28f7d2dd38..81b0bba8b6 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -1,3 +1,4 @@ +from copy import deepcopy from pathlib import Path import os @@ -383,7 +384,7 @@ def __init__(self, param: int = 1): def test_generate_code_simple_pipeline(): config = { - "version": "unstable", + "version": "master", "components": [ { "name": "retri", @@ -411,7 +412,7 @@ def test_generate_code_simple_pipeline(): def test_generate_code_imports(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -443,7 +444,7 @@ def test_generate_code_imports(): def test_generate_code_imports_no_pipeline_cls(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -471,7 +472,7 @@ def test_generate_code_imports_no_pipeline_cls(): def test_generate_code_comment(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -498,7 +499,7 @@ def test_generate_code_comment(): def test_generate_code_is_component_order_invariant(): pipeline_config = { - "version": "unstable", + "version": "master", "pipelines": [ { "name": "Query", @@ -547,13 +548,14 @@ def test_generate_code_is_component_order_invariant(): for components in component_orders: pipeline_config["components"] = components + code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=False) assert code == expected_code def test_generate_code_can_handle_weak_cyclic_pipelines(): config = { - "version": "unstable", + "version": "master", "components": [ {"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}}, {"name": "child", "type": "ChildComponent", "params": {}}, diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 6f1bfb86b9..29b4936fcb 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -1,4 +1,5 @@ from abc import abstractmethod +import logging from numpy import mat import pytest import json @@ -27,7 +28,7 @@ @pytest.fixture(autouse=True) def mock_json_schema(request, monkeypatch, tmp_path): """ - JSON schema with the unstable version and only mocked nodes. + JSON schema with the master version and only mocked nodes. """ # Do not patch integration tests if "integration" in request.keywords: @@ -43,10 +44,8 @@ def mock_json_schema(request, monkeypatch, tmp_path): monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path) # Generate mock schema in tmp_path - filename = f"haystack-pipeline-unstable.schema.json" - test_schema = _json_schema.get_json_schema( - filename=filename, compatible_versions=["unstable", haystack.__version__] - ) + filename = f"haystack-pipeline-master.schema.json" + test_schema = _json_schema.get_json_schema(filename=filename, version="ignore") with open(tmp_path / filename, "w") as schema_file: json.dump(test_schema, schema_file, indent=4) @@ -118,7 +117,7 @@ def test_load_yaml(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: retriever type: MockRetriever @@ -168,12 +167,12 @@ def test_load_yaml_missing_version(tmp_path): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with pytest.raises(PipelineConfigError, match="Validation failed") as e: Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") assert "version" in str(e) -def test_load_yaml_non_existing_version(tmp_path): +def test_load_yaml_non_existing_version(tmp_path, caplog): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( """ @@ -189,12 +188,33 @@ def test_load_yaml_non_existing_version(tmp_path): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version" in str(e) and "random" in str(e) + assert "version random" in caplog.text + assert f"Haystack {haystack.__version__}" in caplog.text + + +def test_load_yaml_non_existing_version_strict(tmp_path): + with open(tmp_path / "tmp_config.yml", "w") as tmp_file: + tmp_file.write( + """ + version: random + components: + - name: docstore + type: MockDocumentStore + pipelines: + - name: my_pipeline + nodes: + - name: docstore + inputs: + - Query + """ + ) + with pytest.raises(PipelineConfigError, match="Cannot load pipeline configuration of version random"): + Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml", strict_version_check=True) -def test_load_yaml_incompatible_version(tmp_path): +def test_load_yaml_incompatible_version(tmp_path, caplog): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( """ @@ -210,16 +230,37 @@ def test_load_yaml_incompatible_version(tmp_path): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version" in str(e) and "1.1.0" in str(e) + assert "version 1.1.0" in caplog.text + assert f"Haystack {haystack.__version__}" in caplog.text + + +def test_load_yaml_incompatible_version_strict(tmp_path): + with open(tmp_path / "tmp_config.yml", "w") as tmp_file: + tmp_file.write( + """ + version: 1.1.0 + components: + - name: docstore + type: MockDocumentStore + pipelines: + - name: my_pipeline + nodes: + - name: docstore + inputs: + - Query + """ + ) + with pytest.raises(PipelineConfigError, match="Cannot load pipeline configuration of version 1.1.0"): + Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml", strict_version_check=True) def test_load_yaml_no_components(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: pipelines: - name: my_pipeline @@ -235,7 +276,7 @@ def test_load_yaml_wrong_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: docstore type: ImaginaryDocumentStore @@ -261,7 +302,7 @@ def __init__(self, param: int): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -347,7 +388,7 @@ def abstract_method(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -371,7 +412,7 @@ def __init__(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: BaseCustomNode @@ -395,7 +436,7 @@ def run(self, *a, **k): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: SomeCustomNode @@ -427,7 +468,7 @@ def __init__(self, other_node: OtherNode): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: other_node type: OtherNode @@ -471,7 +512,7 @@ def __init__(self, some_exotic_parameter: HelperClass = HelperClass(1)): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -508,7 +549,7 @@ def __init__(self, some_exotic_parameter: HelperClass): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -547,7 +588,7 @@ def __init__(self, some_exotic_parameter: Flags = None): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -584,7 +625,7 @@ def __init__(self, some_exotic_parameter: Flags): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -618,7 +659,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -650,7 +691,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: custom_node type: CustomNode @@ -737,7 +778,7 @@ def test_load_yaml_no_pipelines(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: docstore type: MockDocumentStore @@ -753,7 +794,7 @@ def test_load_yaml_invalid_pipeline_name(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: docstore type: MockDocumentStore @@ -774,7 +815,7 @@ def test_load_yaml_pipeline_with_wrong_nodes(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: docstore type: MockDocumentStore @@ -795,7 +836,7 @@ def test_load_yaml_pipeline_not_acyclic_graph(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: retriever type: MockRetriever @@ -822,7 +863,7 @@ def test_load_yaml_wrong_root(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: retriever type: MockRetriever @@ -844,7 +885,7 @@ def test_load_yaml_two_roots(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: retriever type: MockRetriever @@ -870,7 +911,7 @@ def test_load_yaml_disconnected_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: ignore components: - name: docstore type: MockDocumentStore