From 393207fd5a12b373c7ccad5be022c1400fc9608d Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Apr 2022 17:57:24 +0200 Subject: [PATCH 01/22] Change exception into warning, add strict_version param, and remove compatibility between schemas --- .../haystack-pipeline-1.0.0.schema.json | 12 +- .../haystack-pipeline-1.1.0.schema.json | 3648 +++++++++++++++++ .../haystack-pipeline-1.2.0.schema.json | 3648 +++++++++++++++++ ...on => haystack-pipeline-1.3.0.schema.json} | 12 +- ...n => haystack-pipeline-master.schema.json} | 17 +- .../haystack-pipeline.schema.json | 64 +- haystack/pipelines/config.py | 46 +- 7 files changed, 7361 insertions(+), 86 deletions(-) create mode 100644 haystack/json-schemas/haystack-pipeline-1.1.0.schema.json create mode 100644 haystack/json-schemas/haystack-pipeline-1.2.0.schema.json rename haystack/json-schemas/{haystack-pipeline-1.2.1rc0.schema.json => haystack-pipeline-1.3.0.schema.json} (99%) rename haystack/json-schemas/{haystack-pipeline-unstable.schema.json => haystack-pipeline-master.schema.json} (99%) diff --git a/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json b/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json index 6524ed657c..6362d915a6 100644 --- a/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.0.0.schema.json @@ -9,17 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "1.0.0" - }, - { - "const": "1.1.0" - }, - { - "const": "1.2.0" - } - ] + "const": "1.0.0" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline-1.1.0.schema.json b/haystack/json-schemas/haystack-pipeline-1.1.0.schema.json new file mode 100644 index 0000000000..af50926314 --- /dev/null +++ b/haystack/json-schemas/haystack-pipeline-1.1.0.schema.json @@ -0,0 +1,3648 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.1.0.schema.json", + "title": "Haystack Pipeline", + "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version of the Haystack Pipeline file.", + "type": "string", + "const": "1.1.0" + }, + "components": { + "title": "Components", + "description": "Component nodes and their configurations, to later be used in the pipelines section. Define here all the building blocks for the pipelines.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/DeepsetCloudDocumentStoreComponent" + }, + { + "$ref": "#/definitions/ElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/FAISSDocumentStoreComponent" + }, + { + "$ref": "#/definitions/GraphDBKnowledgeGraphComponent" + }, + { + "$ref": "#/definitions/InMemoryDocumentStoreComponent" + }, + { + "$ref": "#/definitions/Milvus2DocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenDistroElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenSearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/SQLDocumentStoreComponent" + }, + { + "$ref": "#/definitions/WeaviateDocumentStoreComponent" + }, + { + "$ref": "#/definitions/AzureConverterComponent" + }, + { + "$ref": "#/definitions/CrawlerComponent" + }, + { + "$ref": "#/definitions/DensePassageRetrieverComponent" + }, + { + "$ref": "#/definitions/Docs2AnswersComponent" + }, + { + "$ref": "#/definitions/DocxToTextConverterComponent" + }, + { + "$ref": "#/definitions/ElasticsearchFilterOnlyRetrieverComponent" + }, + { + "$ref": "#/definitions/ElasticsearchRetrieverComponent" + }, + { + "$ref": "#/definitions/EmbeddingRetrieverComponent" + }, + { + "$ref": "#/definitions/EntityExtractorComponent" + }, + { + "$ref": "#/definitions/EvalAnswersComponent" + }, + { + "$ref": "#/definitions/EvalDocumentsComponent" + }, + { + "$ref": "#/definitions/FARMReaderComponent" + }, + { + "$ref": "#/definitions/FileTypeClassifierComponent" + }, + { + "$ref": "#/definitions/ImageToTextConverterComponent" + }, + { + "$ref": "#/definitions/JoinAnswersComponent" + }, + { + "$ref": "#/definitions/JoinDocumentsComponent" + }, + { + "$ref": "#/definitions/MarkdownConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextOCRConverterComponent" + }, + { + "$ref": "#/definitions/ParsrConverterComponent" + }, + { + "$ref": "#/definitions/PreProcessorComponent" + }, + { + "$ref": "#/definitions/QuestionGeneratorComponent" + }, + { + "$ref": "#/definitions/RAGeneratorComponent" + }, + { + "$ref": "#/definitions/RCIReaderComponent" + }, + { + "$ref": "#/definitions/RouteDocumentsComponent" + }, + { + "$ref": "#/definitions/SentenceTransformersRankerComponent" + }, + { + "$ref": "#/definitions/Seq2SeqGeneratorComponent" + }, + { + "$ref": "#/definitions/SklearnQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TableReaderComponent" + }, + { + "$ref": "#/definitions/TableTextRetrieverComponent" + }, + { + "$ref": "#/definitions/Text2SparqlRetrieverComponent" + }, + { + "$ref": "#/definitions/TextConverterComponent" + }, + { + "$ref": "#/definitions/TfidfRetrieverComponent" + }, + { + "$ref": "#/definitions/TikaConverterComponent" + }, + { + "$ref": "#/definitions/TransformersDocumentClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersReaderComponent" + }, + { + "$ref": "#/definitions/TransformersSummarizerComponent" + }, + { + "$ref": "#/definitions/TransformersTranslatorComponent" + } + ] + }, + "required": [ + "type", + "name" + ], + "additionalProperties": true + }, + "pipelines": { + "title": "Pipelines", + "description": "Multiple pipelines can be defined using the components from the same YAML file.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Name of the pipeline.", + "type": "string" + }, + "nodes": { + "title": "Nodes", + "description": "Nodes to be used by this particular pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of this particular node in the pipeline. This should be one of the names from the components defined in the same file.", + "type": "string" + }, + "inputs": { + "title": "Inputs", + "description": "Input parameters for this node.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "name", + "inputs" + ], + "additionalProperties": false + }, + "required": [ + "name", + "nodes" + ], + "additionalProperties": false + }, + "additionalProperties": false + }, + "additionalProperties": false + } + } + }, + "required": [ + "version", + "components", + "pipelines" + ], + "additionalProperties": false, + "definitions": { + "DeepsetCloudDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DeepsetCloudDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "workspace": { + "title": "Workspace", + "default": "default", + "type": "string" + }, + "index": { + "title": "Index", + "default": "default", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "api_endpoint": { + "title": "Api Endpoint", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 9200, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "api_key_id": { + "title": "Api Key Id", + "type": "string" + }, + "api_key": { + "title": "Api Key", + "type": "string" + }, + "aws4auth": { + "title": "Aws4Auth" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "search_fields": { + "title": "Search Fields", + "default": "content", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": {} + } + ] + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "custom_mapping": { + "title": "Custom Mapping", + "type": "object" + }, + "excluded_meta_data": { + "title": "Excluded Meta Data", + "type": "array", + "items": {} + }, + "analyzer": { + "title": "Analyzer", + "default": "standard", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "http", + "type": "string" + }, + "ca_certs": { + "title": "Ca Certs", + "type": "string" + }, + "verify_certs": { + "title": "Verify Certs", + "default": true, + "type": "boolean" + }, + "recreate_index": { + "title": "Recreate Index", + "default": false, + "type": "boolean" + }, + "create_index": { + "title": "Create Index", + "default": true, + "type": "boolean" + }, + "refresh_type": { + "title": "Refresh Type", + "default": "wait_for", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product" + }, + "timeout": { + "title": "Timeout", + "default": 30 + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "flat", + "type": "string" + }, + "scroll": { + "title": "Scroll", + "default": "1d", + "type": "string" + }, + "skip_missing_embeddings": { + "title": "Skip Missing Embeddings", + "default": true, + "type": "boolean" + }, + "synonyms": { + "title": "Synonyms", + "type": "array", + "items": {} + }, + "synonym_type": { + "title": "Synonym Type", + "default": "synonym", + "type": "string" + }, + "use_system_proxy": { + "title": "Use System Proxy", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FAISSDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FAISSDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///faiss_document_store.db", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "faiss_index_factory_str": { + "title": "Faiss Index Factory Str", + "default": "Flat", + "type": "string" + }, + "faiss_index": { + "title": "Faiss Index", + "type": "string", + "default": null + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "faiss_index_path": { + "title": "Faiss Index Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "faiss_config_path": { + "title": "Faiss Config Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "GraphDBKnowledgeGraphComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "GraphDBKnowledgeGraph" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": 7200, + "type": "integer" + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "index": { + "title": "Index", + "type": "string" + }, + "prefixes": { + "title": "Prefixes", + "default": "", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "InMemoryDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "InMemoryDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "scoring_batch_size": { + "title": "Scoring Batch Size", + "default": 500000, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Milvus2DocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Milvus2DocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///", + "type": "string" + }, + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": "19530", + "type": "string" + }, + "connection_pool": { + "title": "Connection Pool", + "default": "SingletonThread", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "index_file_size": { + "title": "Index File Size", + "default": 1024, + "type": "integer" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "IVF_FLAT", + "type": "string" + }, + "index_param": { + "title": "Index Param", + "type": "object" + }, + "search_param": { + "title": "Search Param", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "id_field": { + "title": "Id Field", + "default": "id", + "type": "string" + }, + "custom_fields": { + "title": "Custom Fields", + "type": "array", + "items": {} + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + }, + "consistency_level": { + "title": "Consistency Level", + "default": 0, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenDistroElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenDistroElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "https://admin:admin@localhost:9200/" + }, + "similarity": { + "title": "Similarity", + "default": "cosine" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenSearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenSearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "verify_certs": { + "title": "Verify Certs", + "default": false + }, + "scheme": { + "title": "Scheme", + "default": "https" + }, + "username": { + "title": "Username", + "default": "admin" + }, + "password": { + "title": "Password", + "default": "admin" + }, + "port": { + "title": "Port", + "default": 9200 + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SQLDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SQLDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "url": { + "title": "Url", + "default": "sqlite://", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "check_same_thread": { + "title": "Check Same Thread", + "default": false, + "type": "boolean" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "WeaviateDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "WeaviateDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "http://localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 8080, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "timeout_config": { + "title": "Timeout Config", + "default": [ + 5, + 15 + ], + "type": "array", + "items": {} + }, + "username": { + "title": "Username", + "type": "string" + }, + "password": { + "title": "Password", + "type": "string" + }, + "index": { + "title": "Index", + "default": "Document", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "cosine", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "hnsw", + "type": "string" + }, + "custom_schema": { + "title": "Custom Schema", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "AzureConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "AzureConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "endpoint": { + "title": "Endpoint", + "type": "string" + }, + "credential_key": { + "title": "Credential Key", + "type": "string" + }, + "model_id": { + "title": "Model Id", + "default": "prebuilt-document", + "type": "string" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + }, + "save_json": { + "title": "Save Json", + "default": false, + "type": "boolean" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "merge_multiple_column_headers": { + "title": "Merge Multiple Column Headers", + "default": true, + "type": "boolean" + } + }, + "required": [ + "endpoint", + "credential_key" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "CrawlerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Crawler" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "output_dir": { + "title": "Output Dir", + "type": "string" + }, + "urls": { + "title": "Urls", + "type": "array", + "items": { + "type": "string" + } + }, + "crawler_depth": { + "title": "Crawler Depth", + "default": 1, + "type": "integer" + }, + "filter_urls": { + "title": "Filter Urls", + "type": "array", + "items": {} + }, + "overwrite_existing_files": { + "title": "Overwrite Existing Files", + "default": true + } + }, + "required": [ + "output_dir" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DensePassageRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DensePassageRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "facebook/dpr-question_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "facebook/dpr-ctx_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Docs2AnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Docs2Answers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": {}, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DocxToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DocxToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchFilterOnlyRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchFilterOnlyRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EmbeddingRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EmbeddingRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "embedding_model": { + "title": "Embedding Model", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 32, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 512, + "type": "integer" + }, + "model_format": { + "title": "Model Format", + "default": "farm", + "type": "string" + }, + "pooling_strategy": { + "title": "Pooling Strategy", + "default": "reduce_mean", + "type": "string" + }, + "emb_extraction_layer": { + "title": "Emb Extraction Layer", + "default": -1, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store", + "embedding_model" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EntityExtractorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EntityExtractor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "dslim/bert-base-NER", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "skip_incorrect_retrieval": { + "title": "Skip Incorrect Retrieval", + "default": true, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "sas_model": { + "title": "Sas Model", + "type": "string" + }, + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FARMReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FARMReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 150, + "type": "integer" + }, + "batch_size": { + "title": "Batch Size", + "default": 50, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "no_ans_boost": { + "title": "No Ans Boost", + "default": 0.0, + "type": "number" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "top_k_per_sample": { + "title": "Top K Per Sample", + "default": 1, + "type": "integer" + }, + "num_processes": { + "title": "Num Processes", + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_filtering": { + "title": "Duplicate Filtering", + "default": 0, + "type": "integer" + }, + "use_confidence_scores": { + "title": "Use Confidence Scores", + "default": true, + "type": "boolean" + }, + "proxies": { + "title": "Proxies", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "local_files_only": { + "title": "Local Files Only", + "default": false + }, + "force_download": { + "title": "Force Download", + "default": false + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FileTypeClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FileTypeClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "supported_types": { + "title": "Supported Types", + "default": [ + "txt", + "pdf", + "md", + "docx", + "html" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ImageToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ImageToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "MarkdownConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "MarkdownConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextOCRConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextOCRConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ParsrConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ParsrConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "parsr_url": { + "title": "Parsr Url", + "default": "http://localhost:3001", + "type": "string" + }, + "extractor": { + "title": "Extractor", + "default": "pdfminer", + "enum": [ + "pdfminer", + "pdfjs" + ], + "type": "string" + }, + "table_detection_mode": { + "title": "Table Detection Mode", + "default": "lattice", + "enum": [ + "lattice", + "stream" + ], + "type": "string" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "remove_page_headers": { + "title": "Remove Page Headers", + "default": false, + "type": "boolean" + }, + "remove_page_footers": { + "title": "Remove Page Footers", + "default": false, + "type": "boolean" + }, + "remove_table_of_contents": { + "title": "Remove Table Of Contents", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PreProcessorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PreProcessor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "clean_whitespace": { + "title": "Clean Whitespace", + "default": true, + "type": "boolean" + }, + "clean_header_footer": { + "title": "Clean Header Footer", + "default": false, + "type": "boolean" + }, + "clean_empty_lines": { + "title": "Clean Empty Lines", + "default": true, + "type": "boolean" + }, + "remove_substrings": { + "title": "Remove Substrings", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "split_by": { + "title": "Split By", + "default": "word", + "type": "string" + }, + "split_length": { + "title": "Split Length", + "default": 200, + "type": "integer" + }, + "split_overlap": { + "title": "Split Overlap", + "default": 0, + "type": "integer" + }, + "split_respect_sentence_boundary": { + "title": "Split Respect Sentence Boundary", + "default": true, + "type": "boolean" + }, + "language": { + "title": "Language", + "default": "en", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "QuestionGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "QuestionGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "valhalla/t5-base-e2e-qg" + }, + "model_version": { + "title": "Model Version" + }, + "num_beams": { + "title": "Num Beams", + "default": 4 + }, + "max_length": { + "title": "Max Length", + "default": 256 + }, + "no_repeat_ngram_size": { + "title": "No Repeat Ngram Size", + "default": 3 + }, + "length_penalty": { + "title": "Length Penalty", + "default": 1.5 + }, + "early_stopping": { + "title": "Early Stopping", + "default": true + }, + "split_length": { + "title": "Split Length", + "default": 50 + }, + "split_overlap": { + "title": "Split Overlap", + "default": 10 + }, + "use_gpu": { + "title": "Use Gpu", + "default": true + }, + "prompt": { + "title": "Prompt", + "default": "generate questions:" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RAGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "facebook/rag-token-nq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "retriever": { + "title": "Retriever", + "type": "string", + "default": null + }, + "generator_type": { + "default": [ + 1 + ], + "allOf": [ + { + "$ref": "#/definitions/RAGeneratorType" + } + ] + }, + "top_k": { + "title": "Top K", + "default": 2, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 2, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "prefix": { + "title": "Prefix", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorType": { + "title": "RAGeneratorType", + "description": "An enumeration.", + "enum": [ + [ + 1 + ], + 2 + ] + }, + "RCIReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RCIReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "row_model_name_or_path": { + "title": "Row Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-row", + "type": "string" + }, + "column_model_name_or_path": { + "title": "Column Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-col", + "type": "string" + }, + "row_model_version": { + "title": "Row Model Version", + "type": "string" + }, + "column_model_version": { + "title": "Column Model Version", + "type": "string" + }, + "row_tokenizer": { + "title": "Row Tokenizer", + "type": "string" + }, + "column_tokenizer": { + "title": "Column Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RouteDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RouteDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SentenceTransformersRankerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SentenceTransformersRanker" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Seq2SeqGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Seq2SeqGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "input_converter": { + "title": "Input Converter", + "type": "string", + "default": null + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 8, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SklearnQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SklearnQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/model.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + }, + "vectorizer_name_or_path": { + "title": "Vectorizer Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/vectorizer.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/tapas-base-finetuned-wtq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableTextRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableTextRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "deepset/bert-small-mm_retrieval-question_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "deepset/bert-small-mm_retrieval-passage_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "table_embedding_model": { + "title": "Table Embedding Model", + "default": "deepset/bert-small-mm_retrieval-table_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "max_seq_len_table": { + "title": "Max Seq Len Table", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_meta_fields": { + "title": "Embed Meta Fields", + "default": [ + "name", + "section_title", + "caption" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Text2SparqlRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Text2SparqlRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "knowledge_graph": { + "title": "Knowledge Graph" + }, + "model_name_or_path": { + "title": "Model Name Or Path" + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + } + }, + "required": [ + "knowledge_graph", + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TfidfRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TfidfRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "auto_fit": { + "title": "Auto Fit", + "default": true + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TikaConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TikaConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "tika_url": { + "title": "Tika Url", + "default": "http://localhost:9998/tika", + "type": "string" + }, + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersDocumentClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersDocumentClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "bhadresh-savani/distilbert-base-uncased-emotion", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "return_all_scores": { + "title": "Return All Scores", + "default": false, + "type": "boolean" + }, + "task": { + "title": "Task", + "default": "text-classification", + "type": "string" + }, + "labels": { + "title": "Labels", + "type": "array", + "items": { + "type": "string" + } + }, + "batch_size": { + "title": "Batch Size", + "default": -1, + "type": "integer" + }, + "classification_field": { + "title": "Classification Field", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "shahrukhx01/bert-mini-finetune-question-detection", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "distilbert-base-uncased-distilled-squad", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 70, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 4, + "type": "integer" + }, + "return_no_answers": { + "title": "Return No Answers", + "default": true, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersSummarizerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersSummarizer" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/pegasus-xsum", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 5, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "separator_for_single_summary": { + "title": "Separator For Single Summary", + "default": " ", + "type": "string" + }, + "generate_single_summary": { + "title": "Generate Single Summary", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersTranslatorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersTranslator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "tokenizer_name": { + "title": "Tokenizer Name", + "type": "string" + }, + "max_seq_len": { + "title": "Max Seq Len", + "type": "integer" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json b/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json new file mode 100644 index 0000000000..c6674beae9 --- /dev/null +++ b/haystack/json-schemas/haystack-pipeline-1.2.0.schema.json @@ -0,0 +1,3648 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.2.0.schema.json", + "title": "Haystack Pipeline", + "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", + "type": "object", + "properties": { + "version": { + "title": "Version", + "description": "Version of the Haystack Pipeline file.", + "type": "string", + "const": "1.2.0" + }, + "components": { + "title": "Components", + "description": "Component nodes and their configurations, to later be used in the pipelines section. Define here all the building blocks for the pipelines.", + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/DeepsetCloudDocumentStoreComponent" + }, + { + "$ref": "#/definitions/ElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/FAISSDocumentStoreComponent" + }, + { + "$ref": "#/definitions/GraphDBKnowledgeGraphComponent" + }, + { + "$ref": "#/definitions/InMemoryDocumentStoreComponent" + }, + { + "$ref": "#/definitions/Milvus2DocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenDistroElasticsearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/OpenSearchDocumentStoreComponent" + }, + { + "$ref": "#/definitions/SQLDocumentStoreComponent" + }, + { + "$ref": "#/definitions/WeaviateDocumentStoreComponent" + }, + { + "$ref": "#/definitions/AzureConverterComponent" + }, + { + "$ref": "#/definitions/CrawlerComponent" + }, + { + "$ref": "#/definitions/DensePassageRetrieverComponent" + }, + { + "$ref": "#/definitions/Docs2AnswersComponent" + }, + { + "$ref": "#/definitions/DocxToTextConverterComponent" + }, + { + "$ref": "#/definitions/ElasticsearchFilterOnlyRetrieverComponent" + }, + { + "$ref": "#/definitions/ElasticsearchRetrieverComponent" + }, + { + "$ref": "#/definitions/EmbeddingRetrieverComponent" + }, + { + "$ref": "#/definitions/EntityExtractorComponent" + }, + { + "$ref": "#/definitions/EvalAnswersComponent" + }, + { + "$ref": "#/definitions/EvalDocumentsComponent" + }, + { + "$ref": "#/definitions/FARMReaderComponent" + }, + { + "$ref": "#/definitions/FileTypeClassifierComponent" + }, + { + "$ref": "#/definitions/ImageToTextConverterComponent" + }, + { + "$ref": "#/definitions/JoinAnswersComponent" + }, + { + "$ref": "#/definitions/JoinDocumentsComponent" + }, + { + "$ref": "#/definitions/MarkdownConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextConverterComponent" + }, + { + "$ref": "#/definitions/PDFToTextOCRConverterComponent" + }, + { + "$ref": "#/definitions/ParsrConverterComponent" + }, + { + "$ref": "#/definitions/PreProcessorComponent" + }, + { + "$ref": "#/definitions/QuestionGeneratorComponent" + }, + { + "$ref": "#/definitions/RAGeneratorComponent" + }, + { + "$ref": "#/definitions/RCIReaderComponent" + }, + { + "$ref": "#/definitions/RouteDocumentsComponent" + }, + { + "$ref": "#/definitions/SentenceTransformersRankerComponent" + }, + { + "$ref": "#/definitions/Seq2SeqGeneratorComponent" + }, + { + "$ref": "#/definitions/SklearnQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TableReaderComponent" + }, + { + "$ref": "#/definitions/TableTextRetrieverComponent" + }, + { + "$ref": "#/definitions/Text2SparqlRetrieverComponent" + }, + { + "$ref": "#/definitions/TextConverterComponent" + }, + { + "$ref": "#/definitions/TfidfRetrieverComponent" + }, + { + "$ref": "#/definitions/TikaConverterComponent" + }, + { + "$ref": "#/definitions/TransformersDocumentClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersQueryClassifierComponent" + }, + { + "$ref": "#/definitions/TransformersReaderComponent" + }, + { + "$ref": "#/definitions/TransformersSummarizerComponent" + }, + { + "$ref": "#/definitions/TransformersTranslatorComponent" + } + ] + }, + "required": [ + "type", + "name" + ], + "additionalProperties": true + }, + "pipelines": { + "title": "Pipelines", + "description": "Multiple pipelines can be defined using the components from the same YAML file.", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Name of the pipeline.", + "type": "string" + }, + "nodes": { + "title": "Nodes", + "description": "Nodes to be used by this particular pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "The name of this particular node in the pipeline. This should be one of the names from the components defined in the same file.", + "type": "string" + }, + "inputs": { + "title": "Inputs", + "description": "Input parameters for this node.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "name", + "inputs" + ], + "additionalProperties": false + }, + "required": [ + "name", + "nodes" + ], + "additionalProperties": false + }, + "additionalProperties": false + }, + "additionalProperties": false + } + } + }, + "required": [ + "version", + "components", + "pipelines" + ], + "additionalProperties": false, + "definitions": { + "DeepsetCloudDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DeepsetCloudDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "api_key": { + "title": "Api Key", + "type": "string" + }, + "workspace": { + "title": "Workspace", + "default": "default", + "type": "string" + }, + "index": { + "title": "Index", + "default": "default", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "api_endpoint": { + "title": "Api Endpoint", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 9200, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "api_key_id": { + "title": "Api Key Id", + "type": "string" + }, + "api_key": { + "title": "Api Key", + "type": "string" + }, + "aws4auth": { + "title": "Aws4Auth" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "search_fields": { + "title": "Search Fields", + "default": "content", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": {} + } + ] + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "custom_mapping": { + "title": "Custom Mapping", + "type": "object" + }, + "excluded_meta_data": { + "title": "Excluded Meta Data", + "type": "array", + "items": {} + }, + "analyzer": { + "title": "Analyzer", + "default": "standard", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "http", + "type": "string" + }, + "ca_certs": { + "title": "Ca Certs", + "type": "string" + }, + "verify_certs": { + "title": "Verify Certs", + "default": true, + "type": "boolean" + }, + "recreate_index": { + "title": "Recreate Index", + "default": false, + "type": "boolean" + }, + "create_index": { + "title": "Create Index", + "default": true, + "type": "boolean" + }, + "refresh_type": { + "title": "Refresh Type", + "default": "wait_for", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product" + }, + "timeout": { + "title": "Timeout", + "default": 30 + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "flat", + "type": "string" + }, + "scroll": { + "title": "Scroll", + "default": "1d", + "type": "string" + }, + "skip_missing_embeddings": { + "title": "Skip Missing Embeddings", + "default": true, + "type": "boolean" + }, + "synonyms": { + "title": "Synonyms", + "type": "array", + "items": {} + }, + "synonym_type": { + "title": "Synonym Type", + "default": "synonym", + "type": "string" + }, + "use_system_proxy": { + "title": "Use System Proxy", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FAISSDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FAISSDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///faiss_document_store.db", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "faiss_index_factory_str": { + "title": "Faiss Index Factory Str", + "default": "Flat", + "type": "string" + }, + "faiss_index": { + "title": "Faiss Index", + "type": "string", + "default": null + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "faiss_index_path": { + "title": "Faiss Index Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "faiss_config_path": { + "title": "Faiss Config Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "GraphDBKnowledgeGraphComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "GraphDBKnowledgeGraph" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": 7200, + "type": "integer" + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "index": { + "title": "Index", + "type": "string" + }, + "prefixes": { + "title": "Prefixes", + "default": "", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "InMemoryDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "InMemoryDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "scoring_batch_size": { + "title": "Scoring Batch Size", + "default": 500000, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Milvus2DocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Milvus2DocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "sql_url": { + "title": "Sql Url", + "default": "sqlite:///", + "type": "string" + }, + "host": { + "title": "Host", + "default": "localhost", + "type": "string" + }, + "port": { + "title": "Port", + "default": "19530", + "type": "string" + }, + "connection_pool": { + "title": "Connection Pool", + "default": "SingletonThread", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "vector_dim": { + "title": "Vector Dim", + "type": "integer" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "index_file_size": { + "title": "Index File Size", + "default": 1024, + "type": "integer" + }, + "similarity": { + "title": "Similarity", + "default": "dot_product", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "IVF_FLAT", + "type": "string" + }, + "index_param": { + "title": "Index Param", + "type": "object" + }, + "search_param": { + "title": "Search Param", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "id_field": { + "title": "Id Field", + "default": "id", + "type": "string" + }, + "custom_fields": { + "title": "Custom Fields", + "type": "array", + "items": {} + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + }, + "consistency_level": { + "title": "Consistency Level", + "default": 0, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenDistroElasticsearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenDistroElasticsearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "https://admin:admin@localhost:9200/" + }, + "similarity": { + "title": "Similarity", + "default": "cosine" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "OpenSearchDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "OpenSearchDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "verify_certs": { + "title": "Verify Certs", + "default": false + }, + "scheme": { + "title": "Scheme", + "default": "https" + }, + "username": { + "title": "Username", + "default": "admin" + }, + "password": { + "title": "Password", + "default": "admin" + }, + "port": { + "title": "Port", + "default": 9200 + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SQLDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SQLDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "url": { + "title": "Url", + "default": "sqlite://", + "type": "string" + }, + "index": { + "title": "Index", + "default": "document", + "type": "string" + }, + "label_index": { + "title": "Label Index", + "default": "label", + "type": "string" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + }, + "check_same_thread": { + "title": "Check Same Thread", + "default": false, + "type": "boolean" + }, + "isolation_level": { + "title": "Isolation Level", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "WeaviateDocumentStoreComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "WeaviateDocumentStore" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "host": { + "title": "Host", + "default": "http://localhost", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "port": { + "title": "Port", + "default": 8080, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + }, + "timeout_config": { + "title": "Timeout Config", + "default": [ + 5, + 15 + ], + "type": "array", + "items": {} + }, + "username": { + "title": "Username", + "type": "string" + }, + "password": { + "title": "Password", + "type": "string" + }, + "index": { + "title": "Index", + "default": "Document", + "type": "string" + }, + "embedding_dim": { + "title": "Embedding Dim", + "default": 768, + "type": "integer" + }, + "content_field": { + "title": "Content Field", + "default": "content", + "type": "string" + }, + "name_field": { + "title": "Name Field", + "default": "name", + "type": "string" + }, + "similarity": { + "title": "Similarity", + "default": "cosine", + "type": "string" + }, + "index_type": { + "title": "Index Type", + "default": "hnsw", + "type": "string" + }, + "custom_schema": { + "title": "Custom Schema", + "type": "object" + }, + "return_embedding": { + "title": "Return Embedding", + "default": false, + "type": "boolean" + }, + "embedding_field": { + "title": "Embedding Field", + "default": "embedding", + "type": "string" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_documents": { + "title": "Duplicate Documents", + "default": "overwrite", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "AzureConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "AzureConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "endpoint": { + "title": "Endpoint", + "type": "string" + }, + "credential_key": { + "title": "Credential Key", + "type": "string" + }, + "model_id": { + "title": "Model Id", + "default": "prebuilt-document", + "type": "string" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + }, + "save_json": { + "title": "Save Json", + "default": false, + "type": "boolean" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "merge_multiple_column_headers": { + "title": "Merge Multiple Column Headers", + "default": true, + "type": "boolean" + } + }, + "required": [ + "endpoint", + "credential_key" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "CrawlerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Crawler" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "output_dir": { + "title": "Output Dir", + "type": "string" + }, + "urls": { + "title": "Urls", + "type": "array", + "items": { + "type": "string" + } + }, + "crawler_depth": { + "title": "Crawler Depth", + "default": 1, + "type": "integer" + }, + "filter_urls": { + "title": "Filter Urls", + "type": "array", + "items": {} + }, + "overwrite_existing_files": { + "title": "Overwrite Existing Files", + "default": true + } + }, + "required": [ + "output_dir" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DensePassageRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DensePassageRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "facebook/dpr-question_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "facebook/dpr-ctx_encoder-single-nq-base", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Docs2AnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Docs2Answers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": {}, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "DocxToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "DocxToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchFilterOnlyRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchFilterOnlyRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ElasticsearchRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ElasticsearchRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "custom_query": { + "title": "Custom Query", + "type": "string" + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EmbeddingRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EmbeddingRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "embedding_model": { + "title": "Embedding Model", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 32, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 512, + "type": "integer" + }, + "model_format": { + "title": "Model Format", + "default": "farm", + "type": "string" + }, + "pooling_strategy": { + "title": "Pooling Strategy", + "default": "reduce_mean", + "type": "string" + }, + "emb_extraction_layer": { + "title": "Emb Extraction Layer", + "default": -1, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store", + "embedding_model" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EntityExtractorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EntityExtractor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "dslim/bert-base-NER", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "skip_incorrect_retrieval": { + "title": "Skip Incorrect Retrieval", + "default": true, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "sas_model": { + "title": "Sas Model", + "type": "string" + }, + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "EvalDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "EvalDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "debug": { + "title": "Debug", + "default": false, + "type": "boolean" + }, + "open_domain": { + "title": "Open Domain", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FARMReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FARMReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 150, + "type": "integer" + }, + "batch_size": { + "title": "Batch Size", + "default": 50, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "no_ans_boost": { + "title": "No Ans Boost", + "default": 0.0, + "type": "number" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "top_k_per_sample": { + "title": "Top K Per Sample", + "default": 1, + "type": "integer" + }, + "num_processes": { + "title": "Num Processes", + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "duplicate_filtering": { + "title": "Duplicate Filtering", + "default": 0, + "type": "integer" + }, + "use_confidence_scores": { + "title": "Use Confidence Scores", + "default": true, + "type": "boolean" + }, + "proxies": { + "title": "Proxies", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "local_files_only": { + "title": "Local Files Only", + "default": false + }, + "force_download": { + "title": "Force Download", + "default": false + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "FileTypeClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "FileTypeClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "supported_types": { + "title": "Supported Types", + "default": [ + "txt", + "pdf", + "md", + "docx", + "html" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ImageToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ImageToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "JoinDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "MarkdownConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "MarkdownConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PDFToTextOCRConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PDFToTextOCRConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "default": [ + "eng" + ], + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "ParsrConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "ParsrConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "parsr_url": { + "title": "Parsr Url", + "default": "http://localhost:3001", + "type": "string" + }, + "extractor": { + "title": "Extractor", + "default": "pdfminer", + "enum": [ + "pdfminer", + "pdfjs" + ], + "type": "string" + }, + "table_detection_mode": { + "title": "Table Detection Mode", + "default": "lattice", + "enum": [ + "lattice", + "stream" + ], + "type": "string" + }, + "preceding_context_len": { + "title": "Preceding Context Len", + "default": 3, + "type": "integer" + }, + "following_context_len": { + "title": "Following Context Len", + "default": 3, + "type": "integer" + }, + "remove_page_headers": { + "title": "Remove Page Headers", + "default": false, + "type": "boolean" + }, + "remove_page_footers": { + "title": "Remove Page Footers", + "default": false, + "type": "boolean" + }, + "remove_table_of_contents": { + "title": "Remove Table Of Contents", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "PreProcessorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "PreProcessor" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "clean_whitespace": { + "title": "Clean Whitespace", + "default": true, + "type": "boolean" + }, + "clean_header_footer": { + "title": "Clean Header Footer", + "default": false, + "type": "boolean" + }, + "clean_empty_lines": { + "title": "Clean Empty Lines", + "default": true, + "type": "boolean" + }, + "remove_substrings": { + "title": "Remove Substrings", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "split_by": { + "title": "Split By", + "default": "word", + "type": "string" + }, + "split_length": { + "title": "Split Length", + "default": 200, + "type": "integer" + }, + "split_overlap": { + "title": "Split Overlap", + "default": 0, + "type": "integer" + }, + "split_respect_sentence_boundary": { + "title": "Split Respect Sentence Boundary", + "default": true, + "type": "boolean" + }, + "language": { + "title": "Language", + "default": "en", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "QuestionGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "QuestionGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "valhalla/t5-base-e2e-qg" + }, + "model_version": { + "title": "Model Version" + }, + "num_beams": { + "title": "Num Beams", + "default": 4 + }, + "max_length": { + "title": "Max Length", + "default": 256 + }, + "no_repeat_ngram_size": { + "title": "No Repeat Ngram Size", + "default": 3 + }, + "length_penalty": { + "title": "Length Penalty", + "default": 1.5 + }, + "early_stopping": { + "title": "Early Stopping", + "default": true + }, + "split_length": { + "title": "Split Length", + "default": 50 + }, + "split_overlap": { + "title": "Split Overlap", + "default": 10 + }, + "use_gpu": { + "title": "Use Gpu", + "default": true + }, + "prompt": { + "title": "Prompt", + "default": "generate questions:" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RAGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "facebook/rag-token-nq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "retriever": { + "title": "Retriever", + "type": "string", + "default": null + }, + "generator_type": { + "default": [ + 1 + ], + "allOf": [ + { + "$ref": "#/definitions/RAGeneratorType" + } + ] + }, + "top_k": { + "title": "Top K", + "default": 2, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 2, + "type": "integer" + }, + "embed_title": { + "title": "Embed Title", + "default": true, + "type": "boolean" + }, + "prefix": { + "title": "Prefix", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RAGeneratorType": { + "title": "RAGeneratorType", + "description": "An enumeration.", + "enum": [ + [ + 1 + ], + 2 + ] + }, + "RCIReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RCIReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "row_model_name_or_path": { + "title": "Row Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-row", + "type": "string" + }, + "column_model_name_or_path": { + "title": "Column Model Name Or Path", + "default": "michaelrglass/albert-base-rci-wikisql-col", + "type": "string" + }, + "row_model_version": { + "title": "Row Model Version", + "type": "string" + }, + "column_model_version": { + "title": "Column Model Version", + "type": "string" + }, + "row_tokenizer": { + "title": "Row Tokenizer", + "type": "string" + }, + "column_tokenizer": { + "title": "Column Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "RouteDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RouteDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SentenceTransformersRankerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SentenceTransformersRanker" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "format": "path" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Seq2SeqGeneratorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Seq2SeqGenerator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "input_converter": { + "title": "Input Converter", + "type": "string", + "default": null + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 2, + "type": "integer" + }, + "num_beams": { + "title": "Num Beams", + "default": 8, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "SklearnQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SklearnQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/model.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + }, + "vectorizer_name_or_path": { + "title": "Vectorizer Name Or Path", + "default": "https://ext-models-haystack.s3.eu-central-1.amazonaws.com/gradboost_query_classifier/vectorizer.pickle", + "anyOf": [ + { + "type": "string" + }, + {} + ] + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/tapas-base-finetuned-wtq", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 3, + "type": "integer" + }, + "return_no_answer": { + "title": "Return No Answer", + "default": false, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TableTextRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TableTextRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "query_embedding_model": { + "title": "Query Embedding Model", + "default": "deepset/bert-small-mm_retrieval-question_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "passage_embedding_model": { + "title": "Passage Embedding Model", + "default": "deepset/bert-small-mm_retrieval-passage_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "table_embedding_model": { + "title": "Table Embedding Model", + "default": "deepset/bert-small-mm_retrieval-table_encoder", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "max_seq_len_query": { + "title": "Max Seq Len Query", + "default": 64, + "type": "integer" + }, + "max_seq_len_passage": { + "title": "Max Seq Len Passage", + "default": 256, + "type": "integer" + }, + "max_seq_len_table": { + "title": "Max Seq Len Table", + "default": 256, + "type": "integer" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "batch_size": { + "title": "Batch Size", + "default": 16, + "type": "integer" + }, + "embed_meta_fields": { + "title": "Embed Meta Fields", + "default": [ + "name", + "section_title", + "caption" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "use_fast_tokenizers": { + "title": "Use Fast Tokenizers", + "default": true, + "type": "boolean" + }, + "infer_tokenizer_classes": { + "title": "Infer Tokenizer Classes", + "default": false, + "type": "boolean" + }, + "similarity_function": { + "title": "Similarity Function", + "default": "dot_product", + "type": "string" + }, + "global_loss_buffer_size": { + "title": "Global Loss Buffer Size", + "default": 150000, + "type": "integer" + }, + "progress_bar": { + "title": "Progress Bar", + "default": true, + "type": "boolean" + }, + "devices": { + "title": "Devices", + "type": "array", + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "string" + } + ] + } + }, + "use_auth_token": { + "title": "Use Auth Token", + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "Text2SparqlRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "Text2SparqlRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "knowledge_graph": { + "title": "Knowledge Graph" + }, + "model_name_or_path": { + "title": "Model Name Or Path" + }, + "top_k": { + "title": "Top K", + "default": 1, + "type": "integer" + } + }, + "required": [ + "knowledge_graph", + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TextConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TextConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TfidfRetrieverComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TfidfRetriever" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "document_store": { + "title": "Document Store", + "type": "string" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "auto_fit": { + "title": "Auto Fit", + "default": true + } + }, + "required": [ + "document_store" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TikaConverterComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TikaConverter" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "tika_url": { + "title": "Tika Url", + "default": "http://localhost:9998/tika", + "type": "string" + }, + "remove_numeric_tables": { + "title": "Remove Numeric Tables", + "default": false, + "type": "boolean" + }, + "valid_languages": { + "title": "Valid Languages", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersDocumentClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersDocumentClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "bhadresh-savani/distilbert-base-uncased-emotion", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "return_all_scores": { + "title": "Return All Scores", + "default": false, + "type": "boolean" + }, + "task": { + "title": "Task", + "default": "text-classification", + "type": "string" + }, + "labels": { + "title": "Labels", + "type": "array", + "items": { + "type": "string" + } + }, + "batch_size": { + "title": "Batch Size", + "default": -1, + "type": "integer" + }, + "classification_field": { + "title": "Classification Field", + "type": "string" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersQueryClassifierComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersQueryClassifier" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "shahrukhx01/bert-mini-finetune-question-detection", + "anyOf": [ + { + "type": "string", + "format": "path" + }, + { + "type": "string" + } + ] + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersReaderComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersReader" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "distilbert-base-uncased-distilled-squad", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "context_window_size": { + "title": "Context Window Size", + "default": 70, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "top_k": { + "title": "Top K", + "default": 10, + "type": "integer" + }, + "top_k_per_candidate": { + "title": "Top K Per Candidate", + "default": 4, + "type": "integer" + }, + "return_no_answers": { + "title": "Return No Answers", + "default": true, + "type": "boolean" + }, + "max_seq_len": { + "title": "Max Seq Len", + "default": 256, + "type": "integer" + }, + "doc_stride": { + "title": "Doc Stride", + "default": 128, + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersSummarizerComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersSummarizer" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "default": "google/pegasus-xsum", + "type": "string" + }, + "model_version": { + "title": "Model Version", + "type": "string" + }, + "tokenizer": { + "title": "Tokenizer", + "type": "string" + }, + "max_length": { + "title": "Max Length", + "default": 200, + "type": "integer" + }, + "min_length": { + "title": "Min Length", + "default": 5, + "type": "integer" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "separator_for_single_summary": { + "title": "Separator For Single Summary", + "default": " ", + "type": "string" + }, + "generate_single_summary": { + "title": "Generate Single Summary", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, + "TransformersTranslatorComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "TransformersTranslator" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "model_name_or_path": { + "title": "Model Name Or Path", + "type": "string" + }, + "tokenizer_name": { + "title": "Tokenizer Name", + "type": "string" + }, + "max_seq_len": { + "title": "Max Seq Len", + "type": "integer" + }, + "clean_up_tokenization_spaces": { + "title": "Clean Up Tokenization Spaces", + "default": true, + "type": "boolean" + }, + "use_gpu": { + "title": "Use Gpu", + "default": true, + "type": "boolean" + } + }, + "required": [ + "model_name_or_path" + ], + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-1.3.0.schema.json similarity index 99% rename from haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json rename to haystack/json-schemas/haystack-pipeline-1.3.0.schema.json index c3f472ff2c..11f6fd4904 100644 --- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.3.0.schema.json @@ -9,17 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "1.2.1rc0" - }, - { - "const": "1.3.0" - }, - { - "const": "1.3.1rc0" - } - ] + "const": "1.3.0" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline-unstable.schema.json b/haystack/json-schemas/haystack-pipeline-master.schema.json similarity index 99% rename from haystack/json-schemas/haystack-pipeline-unstable.schema.json rename to haystack/json-schemas/haystack-pipeline-master.schema.json index 8924481e99..8d3c12546e 100644 --- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json +++ b/haystack/json-schemas/haystack-pipeline-master.schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://haystack.deepset.ai/haystack/json-schemas/haystack-pipeline-unstable.schema.json", + "$id": "https://haystack.deepset.ai/haystack/json-schemas/haystack-pipeline-master.schema.json", "title": "Haystack Pipeline", "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", "type": "object", @@ -9,20 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [ - { - "const": "unstable" - }, - { - "const": "1.2.1rc0" - }, - { - "const": "1.3.0" - }, - { - "const": "1.3.1rc0" - } - ] + "const": "master" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline.schema.json b/haystack/json-schemas/haystack-pipeline.schema.json index bfc8490ad4..dc3710f0f3 100644 --- a/haystack/json-schemas/haystack-pipeline.schema.json +++ b/haystack/json-schemas/haystack-pipeline.schema.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline-1.1.0.schema.json", + "$id": "https://haystack.deepset.ai/json-schemas/haystack-pipeline.schema.json", "title": "Haystack Pipeline", "description": "Haystack Pipeline YAML file describing the nodes of the pipelines. For more info read the docs at: https://haystack.deepset.ai/components/pipelines#yaml-file-definitions", "type": "object", @@ -10,16 +10,12 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "unstable" - } - ] + "const": "master" } } }, { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-unstable.schema.json" + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-master.schema.json" } ] }, @@ -28,17 +24,7 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "1.0.0" - }, - { - "const": "1.1.0" - }, - { - "const": "1.2.0" - } - ] + "const": "1.0.0" } } }, @@ -52,22 +38,40 @@ { "properties": { "version": { - "oneOf": [ - { - "const": "1.2.1rc0" - }, - { - "const": "1.3.0" - }, - { - "const": "1.3.1rc0" - } - ] + "const": "1.1.0" } } }, { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.1rc0.schema.json" + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.1.0.schema.json" + } + ] + }, + { + "allOf": [ + { + "properties": { + "version": { + "const": "1.2.0" + } + } + }, + { + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.0.schema.json" + } + ] + }, + { + "allOf": [ + { + "properties": { + "version": { + "const": "1.3.0" + } + } + }, + { + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.3.0.schema.json" } ] } diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index c0f2b5f3ad..9b3bc27b63 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -143,24 +143,26 @@ def build_component_dependency_graph( return graph -def validate_yaml(path: Path): +def validate_yaml(path: Path, strict_version: bool = False): """ Validates the given YAML file using the autogenerated JSON schema. :param pipeline_config: the configuration to validate + :param strict_version: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ pipeline_config = read_pipeline_config_from_yaml(path) - validate_config(pipeline_config=pipeline_config) + validate_config(pipeline_config=pipeline_config, strict_version=strict_version) logging.debug(f"'{path}' contains valid Haystack pipelines.") -def validate_config(pipeline_config: Dict) -> None: +def validate_config(pipeline_config: Dict, strict_version: bool False) -> None: """ Validates the given configuration using the autogenerated JSON schema. :param pipeline_config: the configuration to validate + :param strict_version: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ @@ -169,28 +171,34 @@ def validate_config(pipeline_config: Dict) -> None: with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-unstable.schema.json", "r") as schema_file: schema = json.load(schema_file) - compatible_versions = [version["const"].replace('"', "") for version in schema["properties"]["version"]["oneOf"]] - loaded_custom_nodes = [] + if not strict_version: + schema["required"] = [attribute for attribute in schema["required"] if attribute != "version"] + try: + del pipeline_config["version"] + except KeyError: + raise PipelineConfigError( + "Your pipeline configuration doesn't have a version. " + "If you're loading a pipeline from YAML, make sure to have a top-level parameter " + "called 'version' in your YAML file and specify which Haystack version you're using." + ) + loaded_custom_nodes = [] while True: - try: Draft7Validator(schema).validate(instance=pipeline_config) - if pipeline_config["version"] == "unstable": + if pipeline_config["version"] != __version__: + if strict_version: + raise PipelineConfigError( + f"Cannot load pipeline configuration of version {pipeline_config['version']} " + f"in Haystack version {__version__}\n" + "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " + "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " + "and fix your configuration accordingly." + ) logging.warning( - "You seem to be using the 'unstable' version of the schema to validate " - "your pipeline configuration.\n" - "This is NOT RECOMMENDED in production environments, as pipelines " - "might manage to load and then misbehave without warnings.\n" - f"Please pin your configurations to '{__version__}' to ensure stability." - ) - - elif pipeline_config["version"] not in compatible_versions: - raise PipelineConfigError( - f"Cannot load pipeline configuration of version {pipeline_config['version']} " - f"in Haystack version {__version__} " - f"(only versions {compatible_versions} are compatible with this Haystack release).\n" + f"This pipeline is version {pipeline_config['version']}, but you're using Haystack {__version__}\n" + "This might cause bugs and unexpected behaviors." "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " "and fix your configuration accordingly." From 5b273521ad98b797475ee4cb14641ea8137d42a2 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Apr 2022 18:48:33 +0200 Subject: [PATCH 02/22] Simplify update_json_schema --- haystack/nodes/_json_schema.py | 210 +++++++-------------------------- 1 file changed, 40 insertions(+), 170 deletions(-) diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 4e878d4022..23c8bcf7bc 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -1,20 +1,15 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type import logging - -from sqlalchemy import schema +from isort import file logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -import os -import re import sys import json import inspect from pathlib import Path -from copy import deepcopy -from difflib import SequenceMatcher import pydantic.schema from pydantic import BaseConfig, BaseSettings, Required, SecretStr, create_model @@ -220,7 +215,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[ def get_json_schema( - filename: str, compatible_versions: List[str], modules: List[str] = ["haystack.document_stores", "haystack.nodes"] + filename: str, version: str, modules: List[str] = ["haystack.document_stores", "haystack.nodes"] ): """ Generate JSON schema for Haystack pipelines. @@ -248,7 +243,7 @@ def get_json_schema( "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "oneOf": [{"const": version} for version in compatible_versions], + "const": version, }, "components": { "title": "Components", @@ -325,171 +320,46 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st return schema -def natural_sort(list_to_sort: List[str]) -> List[str]: - """Sorts a list keeping numbers in the correct numerical order""" - convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanumeric_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)] - return sorted(list_to_sort, key=alphanumeric_key) - - -def load(path: Path) -> Dict[str, Any]: - """Shorthand for loading a JSON""" - with open(path, "r") as json_file: - return json.load(json_file) - - -def dump(data: Dict[str, Any], path: Path) -> None: - """Shorthand for dumping to JSON""" - with open(path, "w") as json_file: - json.dump(data, json_file, indent=2) - -def new_version_entry(version): +def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): """ - Returns a new entry for the version index JSON schema. + If the version contains "rc", only update the master's schema. + Otherwise, create (or update) a new schema. """ - return { - "allOf": [ - {"properties": {"version": {"oneOf": [{"const": version}]}}}, - { - "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/" - f"haystack-pipeline-{version}.schema.json" - }, - ] - } - - -def update_json_schema( - update_index: bool, destination_path: Path = JSON_SCHEMAS_PATH, index_name: str = "haystack-pipeline.schema.json" -): - # Locate the latest schema's path - latest_schema_path = destination_path / Path( - natural_sort(os.listdir(destination_path))[-3] - ) # -1 is index, -2 is unstable - logger.info(f"Latest schema: {latest_schema_path}") - latest_schema = load(latest_schema_path) - - # List the versions supported by the last schema - supported_versions_block = deepcopy(latest_schema["properties"]["version"]["oneOf"]) - supported_versions = [entry["const"].replace('"', "") for entry in supported_versions_block] - logger.info(f"Versions supported by this schema: {supported_versions}") - - # Create new schema with the same filename and versions embedded, to be identical to the latest one. - new_schema = get_json_schema(latest_schema_path.name, supported_versions) - - # Check for backwards compatibility with difflib's SequenceMatcher - # (https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher) - # If the opcodes contain only "insert" and "equal", that means the new schema - # only added lines and did not remove anything from the previous schema. - # We decided that additions only imply backwards compatibility. - # Any other opcode ("replace", "delete") imply that something has been removed - # in the new schema, which breaks backwards compatibility and means we should - # store a new, separate schema. - # People wishing to upgrade from the older schema version will have to change - # version in their YAML to avoid failing validation. - latest_schema_string = json.dumps(latest_schema) - new_schema_string = json.dumps(new_schema) - matcher = SequenceMatcher(None, latest_schema_string, new_schema_string) - schema_diff = matcher.get_opcodes() - is_backwards_incompatible = any(opcode[0] not in ["insert", "equal"] for opcode in schema_diff) - - unstable_versions_block = [] - - # If the two schemas are incompatible, we need a new file. - # Update the schema's filename and supported versions, then save it. - if is_backwards_incompatible: - - # Print a quick diff to explain the differences - logger.info(f"The schemas are NOT backwards compatible. This is the list of INCOMPATIBLE changes only:") - for tag, i1, i2, j1, j2 in schema_diff: - if tag not in ["equal", "insert"]: - logger.info("{!r:>8} --> {!r}".format(latest_schema_string[i1:i2], new_schema_string[j1:j2])) - + # Update master's schema + filename = f"haystack-pipeline-master.schema.json" + with open(destination_path / filename, "w") as json_file: + json.dump(get_json_schema( + filename=filename, + version="master" + ), json_file, indent=2) + + # If it's not an rc version: + if "rc" not in haystack_version: + + # Create/update the specific version file too filename = f"haystack-pipeline-{haystack_version}.schema.json" - logger.info(f"Adding {filename} to the schema folder.") - - # Let's check if the schema changed without a version change - if haystack_version in supported_versions and len(supported_versions) > 1: - logger.info( - f"Version {haystack_version} was supported by the latest schema" - f"(supported versions: {supported_versions}). " - f"Removing support for version {haystack_version} from it." - ) - - supported_versions_block = [ - entry for entry in supported_versions_block if entry["const"].replace('"', "") != haystack_version - ] - latest_schema["properties"]["version"]["oneOf"] = supported_versions_block - dump(latest_schema, latest_schema_path) - - # Update the JSON schema index too - if update_index: - index = load(destination_path / index_name) - index["oneOf"][-1]["allOf"][0]["properties"]["version"]["oneOf"] = supported_versions_block - dump(index, destination_path / index_name) - - # Dump the new schema file - new_schema["$id"] = f"{SCHEMA_URL}{filename}" - unstable_versions_block = [{"const": haystack_version}] - new_schema["properties"]["version"]["oneOf"] = [{"const": haystack_version}] - dump(new_schema, destination_path / filename) - logger.info(f"Schema saved in {destination_path / filename}") - - # Update schema index with a whole new entry - if update_index: - index = load(destination_path / index_name) - new_entry = new_version_entry(haystack_version) - if all(new_entry != entry for entry in index["oneOf"]): - index["oneOf"].append(new_version_entry(haystack_version)) - dump(index, destination_path / index_name) - - # If the two schemas are compatible, no need to write a new one: - # Just add the new version to the list of versions supported by - # the latest schema if it's not there yet - else: - - # Print a quick diff to explain the differences - if not schema_diff or all(tag[0] == "equal" for tag in schema_diff): - logger.info("The schemas are identical, won't create a new file.") - else: - logger.info("The schemas are backwards compatible, overwriting the latest schema.") - logger.info("This is the list of changes:") - for tag, i1, i2, j1, j2 in schema_diff: - if tag not in "equal": - logger.info("{!r:>8} --> {!r}".format(latest_schema_string[i1:i2], new_schema_string[j1:j2])) - - # Overwrite the latest schema (safe to do for additions) - dump(new_schema, latest_schema_path) - - if haystack_version in supported_versions: - unstable_versions_block = supported_versions_block - logger.info( - f"Version {haystack_version} was already supported " f"(supported versions: {supported_versions})" - ) - else: - logger.info( - f"This version ({haystack_version}) was not listed " - f"(supported versions: {supported_versions}): " - "updating the supported versions list." + with open(destination_path / filename, "w") as json_file: + json.dump(get_json_schema( + filename=filename, + version=haystack_version + ), json_file, indent=2) + + # Update the index + index_name = "haystack-pipeline.schema.json" + with open(destination_path / index_name, "r") as json_file: + index = json.load(json_file) + index["oneOf"].append( + { + "allOf": [ + {"properties": {"version": {"const": haystack_version}}}, + { + "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/" + f"haystack-pipeline-{haystack_version}.schema.json" + }, + ] + } ) + with open(destination_path / index_name, "w") as json_file: + json.dump(index, json_file, indent=2) - # Updating the latest schema's list of supported versions - supported_versions_block.append({"const": haystack_version}) - unstable_versions_block = supported_versions_block - latest_schema["properties"]["version"]["oneOf"] = supported_versions_block - dump(latest_schema, latest_schema_path) - logger.info(f"Schema updated in {destination_path / latest_schema_path}") - - # Update the JSON schema index too - if update_index: - index = load(destination_path / index_name) - index["oneOf"][-1]["allOf"][0]["properties"]["version"]["oneOf"] = supported_versions_block - dump(index, destination_path / index_name) - - # Update the unstable schema (for tests and internal use). - unstable_filename = "haystack-pipeline-unstable.schema.json" - unstable_schema = deepcopy(new_schema) - unstable_schema["$id"] = f"{SCHEMA_URL}{unstable_filename}" - unstable_schema["properties"]["version"]["oneOf"] = [{"const": "unstable"}] + unstable_versions_block - dump(unstable_schema, destination_path / unstable_filename) - logger.info(f"Unstable schema saved in {destination_path / unstable_filename}") From 5833879a976d74f5641d4d64b0b9a5f5d28df28c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Apr 2022 18:54:33 +0200 Subject: [PATCH 03/22] Rename unstable into master --- haystack/pipelines/config.py | 2 +- .../pipeline_empty.haystack-pipeline.yml | 2 +- .../pipeline/pipelines.haystack-pipeline.yml | 2 +- .../pipelines_dpr.haystack-pipeline.yml | 2 +- test/samples/dc/pipeline_config.json | 2 +- test/samples/pipeline/test_pipeline.yaml | 2 +- .../test_pipeline_faiss_indexing.yaml | 2 +- .../test_pipeline_faiss_retrieval.yaml | 2 +- .../test_pipeline_tfidfretriever.yaml | 2 +- test/samples/pipeline/test_ray_pipeline.yaml | 2 +- test/test_pipeline.py | 12 ++--- test/test_pipeline_yaml.py | 48 +++++++++---------- 12 files changed, 40 insertions(+), 40 deletions(-) diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 9b3bc27b63..a263902cc1 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -168,7 +168,7 @@ def validate_config(pipeline_config: Dict, strict_version: bool False) -> None: """ validate_config_strings(pipeline_config) - with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-unstable.schema.json", "r") as schema_file: + with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-master.schema.json", "r") as schema_file: schema = json.load(schema_file) if not strict_version: diff --git a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml index 569f19dfad..f174283b80 100644 --- a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml +++ b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml @@ -1,5 +1,5 @@ # Dummy pipeline, used when the CI needs to load the REST API to extract the OpenAPI specs. DO NOT USE. -version: 'unstable' +version: 'master' components: - name: FileTypeClassifier diff --git a/rest_api/pipeline/pipelines.haystack-pipeline.yml b/rest_api/pipeline/pipelines.haystack-pipeline.yml index b32a62c66c..af436d7f12 100644 --- a/rest_api/pipeline/pipelines.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'unstable' +version: 'master' components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml index 72cdf0f63f..79702f8698 100644 --- a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'unstable' +version: 'master' components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/test/samples/dc/pipeline_config.json b/test/samples/dc/pipeline_config.json index 5594f4a0a5..b197a497ce 100644 --- a/test/samples/dc/pipeline_config.json +++ b/test/samples/dc/pipeline_config.json @@ -1,5 +1,5 @@ { - "version": "unstable", + "version": "master", "name": "document_retrieval_1", "components": [ { diff --git a/test/samples/pipeline/test_pipeline.yaml b/test/samples/pipeline/test_pipeline.yaml index b1306ce604..4eee9cfeee 100644 --- a/test/samples/pipeline/test_pipeline.yaml +++ b/test/samples/pipeline/test_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: 'master' components: - name: Reader diff --git a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml index 9fb2a254f9..1584474fe7 100644 --- a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: 'master' components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml index 89a5cbf48c..75eb784f75 100644 --- a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: 'master' components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml index b954d42827..ae0d9530ac 100644 --- a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml +++ b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: 'master' components: - name: Reader diff --git a/test/samples/pipeline/test_ray_pipeline.yaml b/test/samples/pipeline/test_ray_pipeline.yaml index 3ec3864b04..8316dae3ea 100644 --- a/test/samples/pipeline/test_ray_pipeline.yaml +++ b/test/samples/pipeline/test_ray_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'unstable' +version: 'master' components: - name: Reader diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 30b36cafe3..b666111d05 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -352,7 +352,7 @@ def __init__(self, document_store): def test_generate_code_simple_pipeline(): config = { - "version": "unstable", + "version": "master", "components": [ { "name": "retri", @@ -380,7 +380,7 @@ def test_generate_code_simple_pipeline(): def test_generate_code_imports(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -412,7 +412,7 @@ def test_generate_code_imports(): def test_generate_code_imports_no_pipeline_cls(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -440,7 +440,7 @@ def test_generate_code_imports_no_pipeline_cls(): def test_generate_code_comment(): pipeline_config = { - "version": "unstable", + "version": "master", "components": [ {"name": "DocumentStore", "type": "ElasticsearchDocumentStore"}, {"name": "retri", "type": "ElasticsearchRetriever", "params": {"document_store": "DocumentStore"}}, @@ -467,7 +467,7 @@ def test_generate_code_comment(): def test_generate_code_is_component_order_invariant(): pipeline_config = { - "version": "unstable", + "version": "master", "pipelines": [ { "name": "Query", @@ -522,7 +522,7 @@ def test_generate_code_is_component_order_invariant(): def test_generate_code_can_handle_weak_cyclic_pipelines(): config = { - "version": "unstable", + "version": "master", "components": [ {"name": "parent", "type": "ParentComponent", "params": {"dependent": "child"}}, {"name": "child", "type": "ChildComponent", "params": {}}, diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 5342626630..5670424319 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -24,7 +24,7 @@ @pytest.fixture(autouse=True) def mock_json_schema(request, monkeypatch, tmp_path): """ - JSON schema with the unstable version and only mocked nodes. + JSON schema with the master version and only mocked nodes. """ # Do not patch integration tests if "integration" in request.keywords: @@ -40,8 +40,8 @@ def mock_json_schema(request, monkeypatch, tmp_path): monkeypatch.setattr(haystack.pipelines.config, "JSON_SCHEMAS_PATH", tmp_path) # Generate mock schema in tmp_path - filename = f"haystack-pipeline-unstable.schema.json" - test_schema = _json_schema.get_json_schema(filename=filename, compatible_versions=["unstable"]) + filename = f"haystack-pipeline-master.schema.json" + test_schema = _json_schema.get_json_schema(filename=filename, compatible_versions=["master"]) with open(tmp_path / filename, "w") as schema_file: json.dump(test_schema, schema_file, indent=4) @@ -113,7 +113,7 @@ def test_load_yaml(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: retriever type: MockRetriever @@ -214,7 +214,7 @@ def test_load_yaml_no_components(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: pipelines: - name: my_pipeline @@ -230,7 +230,7 @@ def test_load_yaml_wrong_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: docstore type: ImaginaryDocumentStore @@ -256,7 +256,7 @@ def __init__(self, param: int): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -288,7 +288,7 @@ def abstract_method(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -312,7 +312,7 @@ def __init__(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: BaseCustomNode @@ -336,7 +336,7 @@ def run(self, *a, **k): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: SomeCustomNode @@ -368,7 +368,7 @@ def __init__(self, other_node: OtherNode): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: other_node type: OtherNode @@ -412,7 +412,7 @@ def __init__(self, some_exotic_parameter: HelperClass = HelperClass(1)): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -449,7 +449,7 @@ def __init__(self, some_exotic_parameter: HelperClass): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -488,7 +488,7 @@ def __init__(self, some_exotic_parameter: Flags = None): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -525,7 +525,7 @@ def __init__(self, some_exotic_parameter: Flags): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -559,7 +559,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -591,7 +591,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: custom_node type: CustomNode @@ -612,7 +612,7 @@ def test_load_yaml_no_pipelines(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: docstore type: MockDocumentStore @@ -628,7 +628,7 @@ def test_load_yaml_invalid_pipeline_name(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: docstore type: MockDocumentStore @@ -649,7 +649,7 @@ def test_load_yaml_pipeline_with_wrong_nodes(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: docstore type: MockDocumentStore @@ -670,7 +670,7 @@ def test_load_yaml_pipeline_not_acyclic_graph(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: retriever type: MockRetriever @@ -697,7 +697,7 @@ def test_load_yaml_wrong_root(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: retriever type: MockRetriever @@ -719,7 +719,7 @@ def test_load_yaml_two_roots(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: retriever type: MockRetriever @@ -745,7 +745,7 @@ def test_load_yaml_disconnected_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: unstable + version: master components: - name: docstore type: MockDocumentStore From 382931106b244bbb6cbe0ba5627ad2c11ddd8a76 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Apr 2022 18:55:38 +0200 Subject: [PATCH 04/22] Typo --- haystack/pipelines/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index a263902cc1..bf66d527ba 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -157,7 +157,7 @@ def validate_yaml(path: Path, strict_version: bool = False): logging.debug(f"'{path}' contains valid Haystack pipelines.") -def validate_config(pipeline_config: Dict, strict_version: bool False) -> None: +def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None: """ Validates the given configuration using the autogenerated JSON schema. From 07454b5b76fb9ab1956ec05af8cafc811bb4007a Mon Sep 17 00:00:00 2001 From: ZanSara Date: Fri, 1 Apr 2022 19:04:12 +0200 Subject: [PATCH 05/22] Fix KeyErrors --- haystack/nodes/_json_schema.py | 1 - haystack/pipelines/config.py | 8 +++++--- test/test_pipeline_yaml.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 23c8bcf7bc..4ada57db78 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -362,4 +362,3 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): ) with open(destination_path / index_name, "w") as json_file: json.dump(index, json_file, indent=2) - diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index bf66d527ba..249547fe6c 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -171,6 +171,8 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-master.schema.json", "r") as schema_file: schema = json.load(schema_file) + pipeline_version = pipeline_config.get("version", None) + if not strict_version: schema["required"] = [attribute for attribute in schema["required"] if attribute != "version"] try: @@ -187,17 +189,17 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None try: Draft7Validator(schema).validate(instance=pipeline_config) - if pipeline_config["version"] != __version__: + if pipeline_version != __version__: if strict_version: raise PipelineConfigError( - f"Cannot load pipeline configuration of version {pipeline_config['version']} " + f"Cannot load pipeline configuration of version {pipeline_version} " f"in Haystack version {__version__}\n" "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " "and fix your configuration accordingly." ) logging.warning( - f"This pipeline is version {pipeline_config['version']}, but you're using Haystack {__version__}\n" + f"This pipeline is version {pipeline_version}, but you're using Haystack {__version__}\n" "This might cause bugs and unexpected behaviors." "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 5670424319..3fadf23763 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -41,7 +41,7 @@ def mock_json_schema(request, monkeypatch, tmp_path): # Generate mock schema in tmp_path filename = f"haystack-pipeline-master.schema.json" - test_schema = _json_schema.get_json_schema(filename=filename, compatible_versions=["master"]) + test_schema = _json_schema.get_json_schema(filename=filename, version="master") with open(tmp_path / filename, "w") as schema_file: json.dump(test_schema, schema_file, indent=4) @@ -168,7 +168,7 @@ def test_load_yaml_missing_version(tmp_path): assert "version" in str(e) -def test_load_yaml_non_existing_version(tmp_path): +def test_load_yaml_non_existing_version(tmp_path, caplog): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( """ From da633d50f58da7f9da10b2a406cfb10550348f9f Mon Sep 17 00:00:00 2001 From: ZanSara Date: Tue, 5 Apr 2022 10:07:11 +0200 Subject: [PATCH 06/22] Fix tests --- test/test_pipeline_yaml.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 3fadf23763..5bcb082d61 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -1,4 +1,5 @@ from abc import abstractmethod +import logging import pytest import json import inspect @@ -184,12 +185,13 @@ def test_load_yaml_non_existing_version(tmp_path, caplog): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version" in str(e) and "random" in str(e) + assert "version 1.1.0" in caplog.text + assert f"Haystack {haystack.__version__}" in caplog.text -def test_load_yaml_incompatible_version(tmp_path): +def test_load_yaml_incompatible_version(tmp_path, caplog): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( """ @@ -205,9 +207,10 @@ def test_load_yaml_incompatible_version(tmp_path): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version" in str(e) and "1.1.0" in str(e) + assert "version random" in caplog.text + assert f"Haystack {haystack.__version__}" in caplog.text def test_load_yaml_no_components(tmp_path): From b0f0494b4d4990b9e5f40366c68d7b5734a5f26e Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Apr 2022 11:12:33 +0200 Subject: [PATCH 07/22] Prevent validate_config from changing the config to validate --- haystack/pipelines/config.py | 11 +++++++---- test/test_pipeline.py | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 249547fe6c..225d83b70b 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -4,6 +4,7 @@ import os import copy import logging +from copy import deepcopy from pathlib import Path from networkx import DiGraph import yaml @@ -166,17 +167,19 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ - validate_config_strings(pipeline_config) + config_to_validate = deepcopy(pipeline_config) # version might be deleted. We copy to avoid this change from propagating. + + validate_config_strings(config_to_validate) with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-master.schema.json", "r") as schema_file: schema = json.load(schema_file) - pipeline_version = pipeline_config.get("version", None) + pipeline_version = config_to_validate.get("version", None) if not strict_version: schema["required"] = [attribute for attribute in schema["required"] if attribute != "version"] try: - del pipeline_config["version"] + del config_to_validate["version"] except KeyError: raise PipelineConfigError( "Your pipeline configuration doesn't have a version. " @@ -187,7 +190,7 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None loaded_custom_nodes = [] while True: try: - Draft7Validator(schema).validate(instance=pipeline_config) + Draft7Validator(schema).validate(instance=config_to_validate) if pipeline_version != __version__: if strict_version: diff --git a/test/test_pipeline.py b/test/test_pipeline.py index b666111d05..5fa2268fed 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -1,3 +1,4 @@ +from copy import deepcopy from pathlib import Path import os @@ -516,6 +517,7 @@ def test_generate_code_is_component_order_invariant(): for components in component_orders: pipeline_config["components"] = components + code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=False) assert code == expected_code From 9955a095a264351c8ad8bb8165655361c2ab7190 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Apr 2022 11:13:59 +0200 Subject: [PATCH 08/22] Fix tests again --- test/test_pipeline_yaml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 5bcb082d61..5b0a249a46 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -187,7 +187,7 @@ def test_load_yaml_non_existing_version(tmp_path, caplog): ) with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version 1.1.0" in caplog.text + assert "version random" in caplog.text assert f"Haystack {haystack.__version__}" in caplog.text @@ -209,7 +209,7 @@ def test_load_yaml_incompatible_version(tmp_path, caplog): ) with caplog.at_level(logging.WARNING): Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") - assert "version random" in caplog.text + assert "version 1.1.0" in caplog.text assert f"Haystack {haystack.__version__}" in caplog.text From 9cda077246ebac9bf089df49e710362a8ea67d18 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 6 Apr 2022 15:04:03 +0200 Subject: [PATCH 09/22] Fix generate_json_schema.py --- .github/utils/generate_json_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py index 3ecc311cd4..a0b089d5fe 100644 --- a/.github/utils/generate_json_schema.py +++ b/.github/utils/generate_json_schema.py @@ -9,5 +9,5 @@ from haystack.nodes._json_schema import update_json_schema update_json_schema( - update_index=True, destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas" + destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas" ) From 202d7a62789034f1e122ed4d94d7530984740e47 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 6 Apr 2022 13:08:13 +0000 Subject: [PATCH 10/22] Update Documentation & Code Style --- .github/utils/generate_json_schema.py | 4 +--- haystack/nodes/_json_schema.py | 17 ++++------------- haystack/pipelines/config.py | 6 ++++-- test/test_pipeline.py | 2 +- 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/.github/utils/generate_json_schema.py b/.github/utils/generate_json_schema.py index a0b089d5fe..023a16a309 100644 --- a/.github/utils/generate_json_schema.py +++ b/.github/utils/generate_json_schema.py @@ -8,6 +8,4 @@ sys.path.append(".") from haystack.nodes._json_schema import update_json_schema -update_json_schema( - destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas" -) +update_json_schema(destination_path=Path(__file__).parent.parent.parent / "haystack" / "json-schemas") diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index 4ada57db78..9ddf5c08c3 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -214,9 +214,7 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[ return component_schema, {"$ref": f"#/definitions/{component_name}"} -def get_json_schema( - filename: str, version: str, modules: List[str] = ["haystack.document_stores", "haystack.nodes"] -): +def get_json_schema(filename: str, version: str, modules: List[str] = ["haystack.document_stores", "haystack.nodes"]): """ Generate JSON schema for Haystack pipelines. """ @@ -320,19 +318,15 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st return schema - def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): """ - If the version contains "rc", only update the master's schema. + If the version contains "rc", only update the master's schema. Otherwise, create (or update) a new schema. """ # Update master's schema filename = f"haystack-pipeline-master.schema.json" with open(destination_path / filename, "w") as json_file: - json.dump(get_json_schema( - filename=filename, - version="master" - ), json_file, indent=2) + json.dump(get_json_schema(filename=filename, version="master"), json_file, indent=2) # If it's not an rc version: if "rc" not in haystack_version: @@ -340,10 +334,7 @@ def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): # Create/update the specific version file too filename = f"haystack-pipeline-{haystack_version}.schema.json" with open(destination_path / filename, "w") as json_file: - json.dump(get_json_schema( - filename=filename, - version=haystack_version - ), json_file, indent=2) + json.dump(get_json_schema(filename=filename, version=haystack_version), json_file, indent=2) # Update the index index_name = "haystack-pipeline.schema.json" diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 225d83b70b..4002bebacc 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -167,7 +167,9 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ - config_to_validate = deepcopy(pipeline_config) # version might be deleted. We copy to avoid this change from propagating. + config_to_validate = deepcopy( + pipeline_config + ) # version might be deleted. We copy to avoid this change from propagating. validate_config_strings(config_to_validate) @@ -175,7 +177,7 @@ def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None schema = json.load(schema_file) pipeline_version = config_to_validate.get("version", None) - + if not strict_version: schema["required"] = [attribute for attribute in schema["required"] if attribute != "version"] try: diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 5fa2268fed..9109a4a2f7 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -517,7 +517,7 @@ def test_generate_code_is_component_order_invariant(): for components in component_orders: pipeline_config["components"] = components - + code = generate_code(pipeline_config=pipeline_config, pipeline_variable_name="p", generate_imports=False) assert code == expected_code From 575ed868bb9e005ed6728d91c64e8e9dd7d5000b Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Apr 2022 12:06:27 +0200 Subject: [PATCH 11/22] Fix version validation and add tests --- haystack/pipelines/base.py | 20 ++++++++++-- haystack/pipelines/config.py | 61 +++++++++++++++--------------------- test/test_pipeline_yaml.py | 42 ++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 40 deletions(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 35cd826d9d..1d4776f41f 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1,4 +1,5 @@ from __future__ import annotations +from email.policy import strict from os import pipe import tempfile from typing import Dict, List, Optional, Any, Set, Tuple, Union @@ -1071,7 +1072,13 @@ def draw(self, path: Path = Path("pipeline.png")): graphviz.draw(path) @classmethod - def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True): + def load_from_yaml( + cls, + path: Path, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False + ): """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must @@ -1116,6 +1123,7 @@ def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwri to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) """ pipeline_config = read_pipeline_config_from_yaml(path) @@ -1123,11 +1131,16 @@ def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwri pipeline_config=pipeline_config, pipeline_name=pipeline_name, overwrite_with_env_variables=overwrite_with_env_variables, + strict_version_check=strict_version_check ) @classmethod def load_from_config( - cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True + cls, + pipeline_config: Dict, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -1173,8 +1186,9 @@ def load_from_config( to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise). """ - validate_config(pipeline_config) + validate_config(pipeline_config, strict_version_check=strict_version_check) pipeline_definition = get_pipeline_definition(pipeline_config=pipeline_config, pipeline_name=pipeline_name) component_definitions = get_component_definitions( diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 4002bebacc..bce62e340f 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -158,58 +158,47 @@ def validate_yaml(path: Path, strict_version: bool = False): logging.debug(f"'{path}' contains valid Haystack pipelines.") -def validate_config(pipeline_config: Dict, strict_version: bool = False) -> None: +def validate_config(pipeline_config: Dict, strict_version_check: bool = False) -> None: """ Validates the given configuration using the autogenerated JSON schema. :param pipeline_config: the configuration to validate - :param strict_version: whether to fail in case of a version mismatch (throws a warning otherwise) + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ - config_to_validate = deepcopy( - pipeline_config - ) # version might be deleted. We copy to avoid this change from propagating. + validate_config_strings(pipeline_config) - validate_config_strings(config_to_validate) + # Check for the version manually (to avoid validation errors) + pipeline_version = pipeline_config.get("version", None) + + if pipeline_version != __version__: + if strict_version_check: + raise PipelineConfigError( + f"Cannot load pipeline configuration of version {pipeline_version} " + f"in Haystack version {__version__}\n" + "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " + "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " + "and fix your configuration accordingly." + ) + logging.warning( + f"This pipeline is version {pipeline_version}, but you're using Haystack {__version__}\n" + "This might cause bugs and unexpected behaviors." + "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " + "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " + "and fix your configuration accordingly." + ) with open(JSON_SCHEMAS_PATH / f"haystack-pipeline-master.schema.json", "r") as schema_file: schema = json.load(schema_file) - pipeline_version = config_to_validate.get("version", None) - - if not strict_version: - schema["required"] = [attribute for attribute in schema["required"] if attribute != "version"] - try: - del config_to_validate["version"] - except KeyError: - raise PipelineConfigError( - "Your pipeline configuration doesn't have a version. " - "If you're loading a pipeline from YAML, make sure to have a top-level parameter " - "called 'version' in your YAML file and specify which Haystack version you're using." - ) + # Remove the version value from the schema to prevent validation errors on it - a version only have to be present. + del schema["properties"]["version"]["const"] loaded_custom_nodes = [] while True: try: - Draft7Validator(schema).validate(instance=config_to_validate) - - if pipeline_version != __version__: - if strict_version: - raise PipelineConfigError( - f"Cannot load pipeline configuration of version {pipeline_version} " - f"in Haystack version {__version__}\n" - "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " - "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " - "and fix your configuration accordingly." - ) - logging.warning( - f"This pipeline is version {pipeline_version}, but you're using Haystack {__version__}\n" - "This might cause bugs and unexpected behaviors." - "Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), " - "the documentation (https://haystack.deepset.ai/components/pipelines#yaml-file-definitions) " - "and fix your configuration accordingly." - ) + Draft7Validator(schema).validate(instance=pipeline_config) break except ValidationError as validation: diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 5b0a249a46..7c85a0e93c 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -164,7 +164,7 @@ def test_load_yaml_missing_version(tmp_path): - Query """ ) - with pytest.raises(PipelineConfigError) as e: + with pytest.raises(PipelineConfigError, match="Validation failed") as e: Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml") assert "version" in str(e) @@ -191,6 +191,26 @@ def test_load_yaml_non_existing_version(tmp_path, caplog): assert f"Haystack {haystack.__version__}" in caplog.text +def test_load_yaml_non_existing_version_strict(tmp_path): + with open(tmp_path / "tmp_config.yml", "w") as tmp_file: + tmp_file.write( + """ + version: random + components: + - name: docstore + type: MockDocumentStore + pipelines: + - name: my_pipeline + nodes: + - name: docstore + inputs: + - Query + """ + ) + with pytest.raises(PipelineConfigError, match="Cannot load pipeline configuration of version random"): + Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml", strict_version_check=True) + + def test_load_yaml_incompatible_version(tmp_path, caplog): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( @@ -213,6 +233,26 @@ def test_load_yaml_incompatible_version(tmp_path, caplog): assert f"Haystack {haystack.__version__}" in caplog.text +def test_load_yaml_incompatible_version_strict(tmp_path): + with open(tmp_path / "tmp_config.yml", "w") as tmp_file: + tmp_file.write( + """ + version: 1.1.0 + components: + - name: docstore + type: MockDocumentStore + pipelines: + - name: my_pipeline + nodes: + - name: docstore + inputs: + - Query + """ + ) + with pytest.raises(PipelineConfigError, match="Cannot load pipeline configuration of version 1.1.0"): + Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml", strict_version_check=True) + + def test_load_yaml_no_components(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( From 260eed6f18689d447a140a4911adfbca7d93db42 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Apr 2022 12:12:56 +0200 Subject: [PATCH 12/22] Fix mypy --- haystack/pipelines/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index b527f623a0..9b327e2caa 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -136,7 +136,7 @@ def to_notebook_cell( @classmethod @abstractmethod def load_from_config( - cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True + cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -182,6 +182,7 @@ def load_from_config( to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) """ raise NotImplementedError("This is an abstract method. Use Pipeline or RayPipeline instead.") From 9b09f347e5a5faaaae1e4210d6991981354ef382 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 13 Apr 2022 10:24:59 +0000 Subject: [PATCH 13/22] Update Documentation & Code Style --- docs/_src/api/api/pipelines.md | 9 ++++++--- docs/_src/tutorials/tutorials/11.md | 2 +- haystack/pipelines/base.py | 28 ++++++++++++++++------------ 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index 0cce6fc5d0..a6ca9e770c 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -83,7 +83,7 @@ Default value is True. ```python @classmethod @abstractmethod -def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -132,6 +132,7 @@ Here's a sample configuration: to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise) @@ -561,7 +562,7 @@ Create a Graphviz visualization of the pipeline. ```python @classmethod -def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a YAML file defining the individual components and how they're tied together to form @@ -610,6 +611,7 @@ If the pipeline loads correctly regardless, save again the pipeline using `Pipel to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise) @@ -617,7 +619,7 @@ variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an ```python @classmethod -def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True) +def load_from_config(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False) ``` Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -666,6 +668,7 @@ Here's a sample configuration: to change index name param for an ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an `_` sign must be used to specify nested hierarchical properties. +- `strict_version_check`: whether to fail in case of a version mismatch (throws a warning otherwise). diff --git a/docs/_src/tutorials/tutorials/11.md b/docs/_src/tutorials/tutorials/11.md index c6d65c58dd..3d86bde55c 100644 --- a/docs/_src/tutorials/tutorials/11.md +++ b/docs/_src/tutorials/tutorials/11.md @@ -189,7 +189,7 @@ To find out more about these pipelines, have a look at our [documentation](https With any Pipeline, whether prebuilt or custom constructed, you can save a diagram showing how all the components are connected. -![image](https://user-images.githubusercontent.com/1563902/102451716-54813700-4039-11eb-881e-f3c01b47ca15.png) +![image](https://github.com/deepset-ai/haystack/blob/master/docs/img/retriever-reader-pipeline.png) ```python diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 9b327e2caa..0d55dcb326 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -136,7 +136,11 @@ def to_notebook_cell( @classmethod @abstractmethod def load_from_config( - cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, strict_version_check: bool = False + cls, + pipeline_config: Dict, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form @@ -1068,11 +1072,11 @@ def draw(self, path: Path = Path("pipeline.png")): @classmethod def load_from_yaml( - cls, - path: Path, - pipeline_name: Optional[str] = None, - overwrite_with_env_variables: bool = True, - strict_version_check: bool = False + cls, + path: Path, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, ): """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form @@ -1126,16 +1130,16 @@ def load_from_yaml( pipeline_config=pipeline_config, pipeline_name=pipeline_name, overwrite_with_env_variables=overwrite_with_env_variables, - strict_version_check=strict_version_check + strict_version_check=strict_version_check, ) @classmethod def load_from_config( - cls, - pipeline_config: Dict, - pipeline_name: Optional[str] = None, - overwrite_with_env_variables: bool = True, - strict_version_check: bool = False + cls, + pipeline_config: Dict, + pipeline_name: Optional[str] = None, + overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, ): """ Load Pipeline from a config dict defining the individual components and how they're tied together to form From c7ad0827daadc8d4b41f674978196dfff20350e1 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Apr 2022 13:45:32 +0200 Subject: [PATCH 14/22] Rename master into ignore --- .../haystack-pipeline-master.schema.json | 2 +- .../haystack-pipeline.schema.json | 2 +- haystack/nodes/_json_schema.py | 6 +-- .../pipeline_empty.haystack-pipeline.yml | 2 +- .../pipeline/pipelines.haystack-pipeline.yml | 2 +- .../pipelines_dpr.haystack-pipeline.yml | 2 +- test/samples/pipeline/test_pipeline.yaml | 2 +- .../test_pipeline_faiss_indexing.yaml | 2 +- .../test_pipeline_faiss_retrieval.yaml | 2 +- .../test_pipeline_tfidfretriever.yaml | 2 +- test/samples/pipeline/test_ray_pipeline.yaml | 2 +- test/test_pipeline_yaml.py | 42 +++++++++---------- 12 files changed, 34 insertions(+), 34 deletions(-) diff --git a/haystack/json-schemas/haystack-pipeline-master.schema.json b/haystack/json-schemas/haystack-pipeline-master.schema.json index 96643a2011..af2b4ef74c 100644 --- a/haystack/json-schemas/haystack-pipeline-master.schema.json +++ b/haystack/json-schemas/haystack-pipeline-master.schema.json @@ -9,7 +9,7 @@ "title": "Version", "description": "Version of the Haystack Pipeline file.", "type": "string", - "const": "master" + "const": "ignore" }, "components": { "title": "Components", diff --git a/haystack/json-schemas/haystack-pipeline.schema.json b/haystack/json-schemas/haystack-pipeline.schema.json index dc3710f0f3..4ae84b3e8a 100644 --- a/haystack/json-schemas/haystack-pipeline.schema.json +++ b/haystack/json-schemas/haystack-pipeline.schema.json @@ -10,7 +10,7 @@ { "properties": { "version": { - "const": "master" + "const": "ignore" } } }, diff --git a/haystack/nodes/_json_schema.py b/haystack/nodes/_json_schema.py index e917253319..cee8e0c3e7 100644 --- a/haystack/nodes/_json_schema.py +++ b/haystack/nodes/_json_schema.py @@ -319,13 +319,13 @@ def inject_definition_in_schema(node_class: Type[BaseComponent], schema: Dict[st def update_json_schema(destination_path: Path = JSON_SCHEMAS_PATH): """ - If the version contains "rc", only update the master's schema. + If the version contains "rc", only update master's schema. Otherwise, create (or update) a new schema. """ - # Update master's schema + # Update masters's schema filename = f"haystack-pipeline-master.schema.json" with open(destination_path / filename, "w") as json_file: - json.dump(get_json_schema(filename=filename, version="master"), json_file, indent=2) + json.dump(get_json_schema(filename=filename, version="ignore"), json_file, indent=2) # If it's not an rc version: if "rc" not in haystack_version: diff --git a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml index f174283b80..ba4882666f 100644 --- a/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml +++ b/rest_api/pipeline/pipeline_empty.haystack-pipeline.yml @@ -1,5 +1,5 @@ # Dummy pipeline, used when the CI needs to load the REST API to extract the OpenAPI specs. DO NOT USE. -version: 'master' +version: ignore components: - name: FileTypeClassifier diff --git a/rest_api/pipeline/pipelines.haystack-pipeline.yml b/rest_api/pipeline/pipelines.haystack-pipeline.yml index af436d7f12..a65fc0a7cc 100644 --- a/rest_api/pipeline/pipelines.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'master' +version: ignore components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml index 79702f8698..13395f691f 100644 --- a/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml +++ b/rest_api/pipeline/pipelines_dpr.haystack-pipeline.yml @@ -1,6 +1,6 @@ # To allow your IDE to autocomplete and validate your YAML pipelines, name them as .haystack-pipeline.yml -version: 'master' +version: ignore components: # define all the building-blocks for Pipeline - name: DocumentStore diff --git a/test/samples/pipeline/test_pipeline.yaml b/test/samples/pipeline/test_pipeline.yaml index 4eee9cfeee..4dea24273f 100644 --- a/test/samples/pipeline/test_pipeline.yaml +++ b/test/samples/pipeline/test_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'master' +version: ignore components: - name: Reader diff --git a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml index 1584474fe7..db2f83a0db 100644 --- a/test/samples/pipeline/test_pipeline_faiss_indexing.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_indexing.yaml @@ -1,4 +1,4 @@ -version: 'master' +version: ignore components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml index 75eb784f75..462826b923 100644 --- a/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml +++ b/test/samples/pipeline/test_pipeline_faiss_retrieval.yaml @@ -1,4 +1,4 @@ -version: 'master' +version: ignore components: - name: DPRRetriever diff --git a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml index ae0d9530ac..3bc0e1103e 100644 --- a/test/samples/pipeline/test_pipeline_tfidfretriever.yaml +++ b/test/samples/pipeline/test_pipeline_tfidfretriever.yaml @@ -1,4 +1,4 @@ -version: 'master' +version: ignore components: - name: Reader diff --git a/test/samples/pipeline/test_ray_pipeline.yaml b/test/samples/pipeline/test_ray_pipeline.yaml index 8316dae3ea..95b480fdbb 100644 --- a/test/samples/pipeline/test_ray_pipeline.yaml +++ b/test/samples/pipeline/test_ray_pipeline.yaml @@ -1,4 +1,4 @@ -version: 'master' +version: ignore components: - name: Reader diff --git a/test/test_pipeline_yaml.py b/test/test_pipeline_yaml.py index 7c85a0e93c..af8c01048a 100644 --- a/test/test_pipeline_yaml.py +++ b/test/test_pipeline_yaml.py @@ -114,7 +114,7 @@ def test_load_yaml(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: retriever type: MockRetriever @@ -257,7 +257,7 @@ def test_load_yaml_no_components(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: pipelines: - name: my_pipeline @@ -273,7 +273,7 @@ def test_load_yaml_wrong_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: docstore type: ImaginaryDocumentStore @@ -299,7 +299,7 @@ def __init__(self, param: int): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -331,7 +331,7 @@ def abstract_method(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -355,7 +355,7 @@ def __init__(self): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: BaseCustomNode @@ -379,7 +379,7 @@ def run(self, *a, **k): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: SomeCustomNode @@ -411,7 +411,7 @@ def __init__(self, other_node: OtherNode): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: other_node type: OtherNode @@ -455,7 +455,7 @@ def __init__(self, some_exotic_parameter: HelperClass = HelperClass(1)): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -492,7 +492,7 @@ def __init__(self, some_exotic_parameter: HelperClass): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -531,7 +531,7 @@ def __init__(self, some_exotic_parameter: Flags = None): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -568,7 +568,7 @@ def __init__(self, some_exotic_parameter: Flags): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -602,7 +602,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -634,7 +634,7 @@ def __init__(self, some_exotic_parameter: str): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: custom_node type: CustomNode @@ -655,7 +655,7 @@ def test_load_yaml_no_pipelines(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: docstore type: MockDocumentStore @@ -671,7 +671,7 @@ def test_load_yaml_invalid_pipeline_name(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: docstore type: MockDocumentStore @@ -692,7 +692,7 @@ def test_load_yaml_pipeline_with_wrong_nodes(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: docstore type: MockDocumentStore @@ -713,7 +713,7 @@ def test_load_yaml_pipeline_not_acyclic_graph(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: retriever type: MockRetriever @@ -740,7 +740,7 @@ def test_load_yaml_wrong_root(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: retriever type: MockRetriever @@ -762,7 +762,7 @@ def test_load_yaml_two_roots(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: retriever type: MockRetriever @@ -788,7 +788,7 @@ def test_load_yaml_disconnected_component(tmp_path): with open(tmp_path / "tmp_config.yml", "w") as tmp_file: tmp_file.write( f""" - version: master + version: ignore components: - name: docstore type: MockDocumentStore From 09caeefff2859d9ad7d4f3d5eb4999c7cf1d8a6c Mon Sep 17 00:00:00 2001 From: ZanSara Date: Wed, 13 Apr 2022 13:54:38 +0200 Subject: [PATCH 15/22] Complete parameter rename --- haystack/pipelines/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 413ad1b0c8..5ea63e3736 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -145,17 +145,17 @@ def build_component_dependency_graph( return graph -def validate_yaml(path: Path, strict_version: bool = False): +def validate_yaml(path: Path, strict_version_check: bool = False): """ Validates the given YAML file using the autogenerated JSON schema. :param pipeline_config: the configuration to validate - :param strict_version: whether to fail in case of a version mismatch (throws a warning otherwise) + :param strict_version_check: whether to fail in case of a version mismatch (throws a warning otherwise) :return: None if validation is successful :raise: `PipelineConfigError` in case of issues. """ pipeline_config = read_pipeline_config_from_yaml(path) - validate_config(pipeline_config=pipeline_config, strict_version=strict_version) + validate_config(pipeline_config=pipeline_config, strict_version_check=strict_version_check) logging.debug(f"'{path}' contains valid Haystack pipelines.") From 3b3d1b0fddb497dca159c77fbb0c77db78b9232f Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 14 Apr 2022 10:43:05 +0200 Subject: [PATCH 16/22] Mypy and pylint --- haystack/pipelines/base.py | 2 ++ haystack/pipelines/config.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 0d55dcb326..3619542344 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1414,6 +1414,7 @@ def load_from_config( pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, + strict_version_check: bool = False, address: Optional[str] = None, **kwargs, ): @@ -1454,6 +1455,7 @@ def load_from_yaml( pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, + strict_version_check: bool = False, **kwargs, ): """ diff --git a/haystack/pipelines/config.py b/haystack/pipelines/config.py index 5ea63e3736..f34bfc0a15 100644 --- a/haystack/pipelines/config.py +++ b/haystack/pipelines/config.py @@ -5,7 +5,6 @@ import copy import json import logging -from copy import deepcopy from pathlib import Path import yaml From 86bbc23cf37e300e2925b0a0c5bb6bf285346629 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Apr 2022 08:46:29 +0000 Subject: [PATCH 17/22] Update Documentation & Code Style --- docs/_src/api/api/pipelines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md index a6ca9e770c..1e44e16482 100644 --- a/docs/_src/api/api/pipelines.md +++ b/docs/_src/api/api/pipelines.md @@ -772,7 +772,7 @@ def __init__(address: str = None, **kwargs) ```python @classmethod -def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, **kwargs, ,) +def load_from_yaml(cls, path: Path, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True, address: Optional[str] = None, strict_version_check: bool = False, **kwargs, ,) ``` Load Pipeline from a YAML file defining the individual components and how they're tied together to form From 3daa7fa2fffb1e0647c3d7bbecf96060c59654f3 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 14 Apr 2022 13:36:15 +0200 Subject: [PATCH 18/22] mypy --- haystack/pipelines/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index bceaef552d..909351791d 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1457,7 +1457,7 @@ def load_from_yaml( address: Optional[str] = None, strict_version_check: bool = False, **kwargs, - ): + ): # type: ignore """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must From 048266aa0fd950148d156219a0486ed800e94544 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Apr 2022 11:38:35 +0000 Subject: [PATCH 19/22] Update Documentation & Code Style --- haystack/pipelines/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 909351791d..0c3538d7ac 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1457,7 +1457,7 @@ def load_from_yaml( address: Optional[str] = None, strict_version_check: bool = False, **kwargs, - ): # type: ignore + ): # type: ignore """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must From 516c4558387e8f8425a52aa8962b4abf4bc2e899 Mon Sep 17 00:00:00 2001 From: ZanSara Date: Thu, 14 Apr 2022 14:08:33 +0200 Subject: [PATCH 20/22] mypy --- haystack/pipelines/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 909351791d..0408939ddf 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1449,7 +1449,7 @@ def load_from_config( return pipeline @classmethod - def load_from_yaml( + def load_from_yaml( # type: ignore cls, path: Path, pipeline_name: Optional[str] = None, @@ -1457,7 +1457,7 @@ def load_from_yaml( address: Optional[str] = None, strict_version_check: bool = False, **kwargs, - ): # type: ignore + ): """ Load Pipeline from a YAML file defining the individual components and how they're tied together to form a Pipeline. A single YAML can declare multiple Pipelines, in which case an explicit `pipeline_name` must From fcaa4995848e85d507ea6780ed5dc2645672840c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Apr 2022 12:21:24 +0000 Subject: [PATCH 21/22] Update Documentation & Code Style --- haystack/pipelines/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index 0408939ddf..70963bbacf 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -1449,7 +1449,7 @@ def load_from_config( return pipeline @classmethod - def load_from_yaml( # type: ignore + def load_from_yaml( # type: ignore cls, path: Path, pipeline_name: Optional[str] = None, From dd274fbd94c82780aa74a2eeef9805f018e4285c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 Apr 2022 12:47:45 +0000 Subject: [PATCH 22/22] Update Documentation & Code Style --- docs/_src/tutorials/tutorials/8.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_src/tutorials/tutorials/8.md b/docs/_src/tutorials/tutorials/8.md index 1480272354..5e56342818 100644 --- a/docs/_src/tutorials/tutorials/8.md +++ b/docs/_src/tutorials/tutorials/8.md @@ -37,7 +37,7 @@ This tutorial will show you all the tools that Haystack provides to help you cas !pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab,ocr] # For Colab/linux based machines -!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz +!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz !tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin # For Macos machines