Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix building Pipeline with YAML #800

Merged
merged 2 commits into from
Feb 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/_src/api/api/document_store.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ Return a summary of the documents in the document store
#### update\_embeddings

```python
| update_embeddings(retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000)
| update_embeddings(retriever, index: Optional[str] = None, batch_size: int = 10_000)
```

Updates the embeddings in the the document store using the encoding model specified in the retriever.
Expand Down
5 changes: 5 additions & 0 deletions haystack/document_store/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.document_store.faiss import FAISSDocumentStore
from haystack.document_store.memory import InMemoryDocumentStore
from haystack.document_store.milvus import MilvusDocumentStore
from haystack.document_store.sql import SQLDocumentStore
3 changes: 1 addition & 2 deletions haystack/document_store/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from haystack.document_store.base import BaseDocumentStore
from haystack import Document, Label
from haystack.retriever.base import BaseRetriever
from haystack.utils import get_batches_from_generator

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -755,7 +754,7 @@ def describe_documents(self, index=None):
}
return stats

def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000):
def update_embeddings(self, retriever, index: Optional[str] = None, batch_size: int = 10_000):
tholor marked this conversation as resolved.
Show resolved Hide resolved
"""
Updates the embeddings in the the document store using the encoding model specified in the retriever.
This can be useful if want to add or change the embeddings for your documents (e.g. after changing the retriever config).
Expand Down
4 changes: 4 additions & 0 deletions haystack/file_converter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from haystack.file_converter.docx import DocxToTextConverter
from haystack.file_converter.pdf import PDFToTextConverter
from haystack.file_converter.tika import TikaConverter
from haystack.file_converter.txt import TextConverter
1 change: 1 addition & 0 deletions haystack/generator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from haystack.generator.transformers import RAGenerator
17 changes: 2 additions & 15 deletions haystack/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import os
from copy import deepcopy
from pathlib import Path
from typing import List, Optional, Dict, Type
from typing import List, Optional, Dict

import networkx as nx
import yaml
from networkx import DiGraph
from networkx.drawing.nx_agraph import to_agraph

from haystack import BaseComponent
from haystack.document_store.base import BaseDocumentStore
from haystack.generator.base import BaseGenerator
from haystack.reader.base import BaseReader
from haystack.retriever.base import BaseRetriever
Expand Down Expand Up @@ -240,19 +239,7 @@ def _load_or_get_component(cls, name: str, definitions: dict, components: dict):
cls._load_or_get_component(name=value, definitions=definitions, components=components)
component_params[key] = components[value] # substitute reference (string) with the component object.

if "DocumentStore" in component_type:
ComponentClass: Type[BaseComponent] = BaseDocumentStore
elif "Reader" in component_type:
ComponentClass = BaseReader
elif "Retriever" in component_type:
ComponentClass = BaseRetriever
elif "Generator" in component_type:
ComponentClass = BaseGenerator
elif "Summarizer" in component_type:
ComponentClass = BaseSummarizer
else:
raise NotImplementedError(f"Component of type '{component_type}' is not implemented for pipelines.")
instance = ComponentClass.load_from_args(component_type=component_type, **component_params)
instance = BaseComponent.load_from_args(component_type=component_type, **component_params)
components[name] = instance
return instance

Expand Down
1 change: 1 addition & 0 deletions haystack/preprocessor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from haystack.preprocessor.preprocessor import PreProcessor
2 changes: 2 additions & 0 deletions haystack/reader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
2 changes: 2 additions & 0 deletions haystack/retriever/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from haystack.retriever.dense import DensePassageRetriever, EmbeddingRetriever
from haystack.retriever.sparse import ElasticsearchRetriever
2 changes: 2 additions & 0 deletions haystack/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,5 +233,7 @@ def load_from_args(cls, component_type: str, **kwargs):
:param component_type: name of the component class to load.
:param kwargs: parameters to pass to the __init__() for the component.
"""
if component_type not in cls.subclasses.keys():
raise Exception(f"Haystack component with the name '{component_type}' does not exist.")
instance = cls.subclasses[component_type](**kwargs)
return instance
1 change: 1 addition & 0 deletions haystack/summarizer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from haystack.summarizer.transformers import TransformersSummarizer