Skip to content

Commit

Permalink
feat!: Add extra for inference dependencies such as torch (#5147)
Browse files Browse the repository at this point in the history
* feat!: add extra for inference dependencies such as torch

* add inference extra to 'all' and 'all-gpu' extra

* install inference extra in selected integration tests

* import LazyImport

* review feedback

* add import error messages and update readme

* remove extra dot
  • Loading branch information
julian-risch authored Jun 20, 2023
1 parent 916e845 commit 30fdf2b
Show file tree
Hide file tree
Showing 29 changed files with 49 additions and 38 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install Haystack
run: pip install .[elasticsearch,dev,preprocessing]
run: pip install .[elasticsearch,dev,preprocessing,inference]

- name: Run tests
run: |
Expand Down Expand Up @@ -608,7 +608,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]

- name: Run tests
run: |
Expand Down Expand Up @@ -662,7 +662,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]

- name: Run tests
run: |
Expand Down Expand Up @@ -716,7 +716,7 @@ jobs:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install Haystack
run: pip install .[dev,preprocessing]
run: pip install .[dev,preprocessing,inference]

- name: Run tests
run: |
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,18 @@ This command installs everything needed for basic Pipelines that use an in-memor

**Full Installation**

To use more advanced features, like certain DocumentStores, FileConverters, OCR, or Ray,
To use more advanced features, like certain DocumentStores, FileConverters, OCR, local inference with pytorch, or Ray,
you need to install further dependencies. The following command installs the [latest release](https://github.com/deepset-ai/haystack/releases) of Haystack and all its dependencies:

```sh
pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies
```

If you want to install only the dependencies needed for model inference on your local hardware (not remote API endpoints), such as torch and sentence-transformers, you can use the following command:
```sh
pip install 'farm-haystack[inference]' ## installs torch, sentence-transformers, sentencepiece, and huggingface-hub
```

If you want to try out the newest features that are not in an official release yet, you can install the unstable version from the main branch with the following command:

```sh
Expand Down
3 changes: 2 additions & 1 deletion haystack/document_stores/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports

Expand Down Expand Up @@ -92,6 +92,7 @@ def __init__(
You can learn more about these parameters by visiting https://github.com/dorianbrown/rank_bm25
By default, no parameters are set.
"""
torch_import.check()
if bm25_parameters is None:
bm25_parameters = {}
super().__init__()
Expand Down
2 changes: 1 addition & 1 deletion haystack/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from haystack import __version__
from haystack.lazy_imports import LazyImport

with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch

with LazyImport() as transformers_import:
Expand Down
2 changes: 1 addition & 1 deletion haystack/modeling/model/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
SPECIAL_TOKENIZER_CHARS = r"^(##|Ġ|▁)"


with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'.") as transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as transformers_import:
import transformers
from transformers import PreTrainedTokenizer, RobertaTokenizer, AutoConfig, AutoFeatureExtractor, AutoTokenizer

Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/answer_generator/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import (
RagTokenizer,
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/audio/whisper_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from haystack.lazy_imports import LazyImport


with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch


Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/doc_language_classifier/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/document_classifier/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from haystack.nodes.document_classifier.base import BaseDocumentClassifier
from haystack.lazy_imports import LazyImport

with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/extractor/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/image_to_text/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/label_generator/pseudo_label_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/prompt/invocation_layer/hugging_face.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import (
pipeline,
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/prompt/prompt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from haystack.schema import Document, MultiLabel
from haystack.lazy_imports import LazyImport

with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch


Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/prompt/prompt_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from haystack.nodes.prompt.prompt_template import PromptTemplate
from haystack.lazy_imports import LazyImport

with LazyImport() as torch_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import:
import torch


Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/query_classifier/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
3 changes: 2 additions & 1 deletion haystack/nodes/question_generator/question_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
Expand Down Expand Up @@ -93,6 +93,7 @@ def __init__(
parameter is not used and a single CPU device is used for inference.
"""
torch_and_transformers_import.check()
super().__init__()
self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1:
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/ranker/sentence_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.nn import DataParallel
from transformers import AutoModelForSequenceClassification, AutoTokenizer
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from haystack.modeling.data_handler.data_silo import DataSilo, DistillationDataSilo
from haystack.modeling.data_handler.processor import SquadProcessor, Processor
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/reader/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

TableQuestionAnsweringPipeline = object
TapasPreTrainedModel = object
with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import ( # type: ignore
TapasTokenizer,
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/reader/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from transformers.data.processors.squad import SquadExample
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/retriever/_embedding_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import InputExample, SentenceTransformer
from torch.utils.data import DataLoader
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/retriever/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from torch.nn import DataParallel
from torch.utils.data.sampler import SequentialSampler
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/retriever/multimodal/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from haystack.utils.torch_utils import get_devices # pylint: disable=ungrouped-imports
from haystack.modeling.model.multimodal import get_model # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/retriever/multimodal/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch


Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/sampler/top_p_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from sentence_transformers import CrossEncoder
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/summarizer/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import pipeline
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
2 changes: 1 addition & 1 deletion haystack/nodes/translator/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)


with LazyImport() as torch_and_transformers_import:
with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports
Expand Down
18 changes: 11 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ classifiers = [
dependencies = [
"requests",
"pydantic",
"transformers[torch,sentencepiece]==4.30.1",
"transformers==4.30.1",
"pandas",
"rank_bm25",
"scikit-learn>=1.0.0", # TF-IDF, SklearnQueryClassifier and metrics
Expand All @@ -62,16 +62,15 @@ dependencies = [
"quantulum3", # quantities extraction from text
"posthog", # telemetry
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
"huggingface-hub>=0.5.0",
"tenacity", # retry decorator
"sseclient-py", # server side events for OpenAI streaming
"more_itertools", # utilities

# Web Retriever
"boilerpy3",

# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"sentence-transformers>=2.2.0",
# Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py
"Pillow",

# OpenAI tokenizer
"tiktoken>=0.3.2",
Expand All @@ -89,6 +88,11 @@ dependencies = [
]

[project.optional-dependencies]
inference = [
"transformers[torch,sentencepiece]==4.30.1",
"sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
"huggingface-hub>=0.5.0",
]
elasticsearch = [
"elasticsearch>=7.17,<8",
]
Expand Down Expand Up @@ -212,11 +216,11 @@ formatting = [
]

all = [
"farm-haystack[docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]",
"farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]",
]
all-gpu = [
# beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
"farm-haystack[docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]",
"farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]",
]

[project.scripts]
Expand Down

0 comments on commit 30fdf2b

Please sign in to comment.