From 30fdf2b5dfa3ef9af1dbf5e40747ce5e1364e112 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 20 Jun 2023 09:54:10 +0200 Subject: [PATCH] feat!: Add extra for inference dependencies such as torch (#5147) * feat!: add extra for inference dependencies such as torch * add inference extra to 'all' and 'all-gpu' extra * install inference extra in selected integration tests * import LazyImport * review feedback * add import error messages and update readme * remove extra dot --- .github/workflows/tests.yml | 8 ++++---- README.md | 7 ++++++- haystack/document_stores/memory.py | 3 ++- haystack/environment.py | 2 +- haystack/modeling/model/feature_extraction.py | 2 +- .../nodes/answer_generator/transformers.py | 2 +- haystack/nodes/audio/whisper_transcriber.py | 2 +- .../doc_language_classifier/transformers.py | 2 +- .../nodes/document_classifier/transformers.py | 2 +- haystack/nodes/extractor/entity.py | 2 +- haystack/nodes/image_to_text/transformers.py | 2 +- .../label_generator/pseudo_label_generator.py | 2 +- .../prompt/invocation_layer/hugging_face.py | 2 +- haystack/nodes/prompt/prompt_model.py | 2 +- haystack/nodes/prompt/prompt_node.py | 2 +- .../nodes/query_classifier/transformers.py | 2 +- .../question_generator/question_generator.py | 3 ++- haystack/nodes/ranker/sentence_transformers.py | 2 +- haystack/nodes/reader/farm.py | 2 +- haystack/nodes/reader/table.py | 2 +- haystack/nodes/reader/transformers.py | 2 +- haystack/nodes/retriever/_embedding_encoder.py | 2 +- haystack/nodes/retriever/dense.py | 2 +- .../nodes/retriever/multimodal/embedder.py | 2 +- .../nodes/retriever/multimodal/retriever.py | 2 +- haystack/nodes/sampler/top_p_sampler.py | 2 +- haystack/nodes/summarizer/transformers.py | 2 +- haystack/nodes/translator/transformers.py | 2 +- pyproject.toml | 18 +++++++++++------- 29 files changed, 49 insertions(+), 38 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 49e30ba1ae..658b97f870 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -210,7 +210,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[elasticsearch,dev,preprocessing] + run: pip install .[elasticsearch,dev,preprocessing,inference] - name: Run tests run: | @@ -608,7 +608,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preprocessing] + run: pip install .[dev,preprocessing,inference] - name: Run tests run: | @@ -662,7 +662,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preprocessing] + run: pip install .[dev,preprocessing,inference] - name: Run tests run: | @@ -716,7 +716,7 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: Install Haystack - run: pip install .[dev,preprocessing] + run: pip install .[dev,preprocessing,inference] - name: Run tests run: | diff --git a/README.md b/README.md index a71e00a89d..c722f01afc 100644 --- a/README.md +++ b/README.md @@ -76,13 +76,18 @@ This command installs everything needed for basic Pipelines that use an in-memor **Full Installation** -To use more advanced features, like certain DocumentStores, FileConverters, OCR, or Ray, +To use more advanced features, like certain DocumentStores, FileConverters, OCR, local inference with pytorch, or Ray, you need to install further dependencies. The following command installs the [latest release](https://github.com/deepset-ai/haystack/releases) of Haystack and all its dependencies: ```sh pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies ``` +If you want to install only the dependencies needed for model inference on your local hardware (not remote API endpoints), such as torch and sentence-transformers, you can use the following command: +```sh +pip install 'farm-haystack[inference]' ## installs torch, sentence-transformers, sentencepiece, and huggingface-hub +``` + If you want to try out the newest features that are not in an official release yet, you can install the unstable version from the main branch with the following command: ```sh diff --git a/haystack/document_stores/memory.py b/haystack/document_stores/memory.py index 0482f40225..9100538e33 100644 --- a/haystack/document_stores/memory.py +++ b/haystack/document_stores/memory.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import: import torch from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports @@ -92,6 +92,7 @@ def __init__( You can learn more about these parameters by visiting https://github.com/dorianbrown/rank_bm25 By default, no parameters are set. """ + torch_import.check() if bm25_parameters is None: bm25_parameters = {} super().__init__() diff --git a/haystack/environment.py b/haystack/environment.py index 3552d0ced6..20d63c31f6 100644 --- a/haystack/environment.py +++ b/haystack/environment.py @@ -8,7 +8,7 @@ from haystack import __version__ from haystack.lazy_imports import LazyImport -with LazyImport() as torch_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import: import torch with LazyImport() as transformers_import: diff --git a/haystack/modeling/model/feature_extraction.py b/haystack/modeling/model/feature_extraction.py index 3382d58a9b..2ce31175ac 100644 --- a/haystack/modeling/model/feature_extraction.py +++ b/haystack/modeling/model/feature_extraction.py @@ -32,7 +32,7 @@ SPECIAL_TOKENIZER_CHARS = r"^(##|Ġ|▁)" -with LazyImport(message="Run 'pip install transformers[torch,sentencepiece]'.") as transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as transformers_import: import transformers from transformers import PreTrainedTokenizer, RobertaTokenizer, AutoConfig, AutoFeatureExtractor, AutoTokenizer diff --git a/haystack/nodes/answer_generator/transformers.py b/haystack/nodes/answer_generator/transformers.py index 08fd9f7671..f41d5db9b1 100644 --- a/haystack/nodes/answer_generator/transformers.py +++ b/haystack/nodes/answer_generator/transformers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import ( RagTokenizer, diff --git a/haystack/nodes/audio/whisper_transcriber.py b/haystack/nodes/audio/whisper_transcriber.py index c2c8a67924..29de05041f 100644 --- a/haystack/nodes/audio/whisper_transcriber.py +++ b/haystack/nodes/audio/whisper_transcriber.py @@ -11,7 +11,7 @@ from haystack.lazy_imports import LazyImport -with LazyImport() as torch_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import: import torch diff --git a/haystack/nodes/doc_language_classifier/transformers.py b/haystack/nodes/doc_language_classifier/transformers.py index 01046370ad..4a91a54a42 100644 --- a/haystack/nodes/doc_language_classifier/transformers.py +++ b/haystack/nodes/doc_language_classifier/transformers.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/document_classifier/transformers.py b/haystack/nodes/document_classifier/transformers.py index 461be3942d..497fcbb1b8 100644 --- a/haystack/nodes/document_classifier/transformers.py +++ b/haystack/nodes/document_classifier/transformers.py @@ -8,7 +8,7 @@ from haystack.nodes.document_classifier.base import BaseDocumentClassifier from haystack.lazy_imports import LazyImport -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/extractor/entity.py b/haystack/nodes/extractor/entity.py index a85662e8d8..14e9a4886b 100644 --- a/haystack/nodes/extractor/entity.py +++ b/haystack/nodes/extractor/entity.py @@ -33,7 +33,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from torch.utils.data import Dataset, DataLoader from transformers import AutoTokenizer, AutoModelForTokenClassification diff --git a/haystack/nodes/image_to_text/transformers.py b/haystack/nodes/image_to_text/transformers.py index 4eba37e3dd..f2ea333b7a 100644 --- a/haystack/nodes/image_to_text/transformers.py +++ b/haystack/nodes/image_to_text/transformers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/label_generator/pseudo_label_generator.py b/haystack/nodes/label_generator/pseudo_label_generator.py index 1730dfe13d..f6af184fd0 100644 --- a/haystack/nodes/label_generator/pseudo_label_generator.py +++ b/haystack/nodes/label_generator/pseudo_label_generator.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from sentence_transformers import CrossEncoder from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/prompt/invocation_layer/hugging_face.py b/haystack/nodes/prompt/invocation_layer/hugging_face.py index 6bdef60d4b..f7728b53fc 100644 --- a/haystack/nodes/prompt/invocation_layer/hugging_face.py +++ b/haystack/nodes/prompt/invocation_layer/hugging_face.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import ( pipeline, diff --git a/haystack/nodes/prompt/prompt_model.py b/haystack/nodes/prompt/prompt_model.py index 9e57146b4a..7c750e1432 100644 --- a/haystack/nodes/prompt/prompt_model.py +++ b/haystack/nodes/prompt/prompt_model.py @@ -7,7 +7,7 @@ from haystack.schema import Document, MultiLabel from haystack.lazy_imports import LazyImport -with LazyImport() as torch_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import: import torch diff --git a/haystack/nodes/prompt/prompt_node.py b/haystack/nodes/prompt/prompt_node.py index da58a3cbd2..a5e8be2f9b 100644 --- a/haystack/nodes/prompt/prompt_node.py +++ b/haystack/nodes/prompt/prompt_node.py @@ -10,7 +10,7 @@ from haystack.nodes.prompt.prompt_template import PromptTemplate from haystack.lazy_imports import LazyImport -with LazyImport() as torch_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_import: import torch diff --git a/haystack/nodes/query_classifier/transformers.py b/haystack/nodes/query_classifier/transformers.py index 8ef6d97c5e..ee0079ffbc 100644 --- a/haystack/nodes/query_classifier/transformers.py +++ b/haystack/nodes/query_classifier/transformers.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/question_generator/question_generator.py b/haystack/nodes/question_generator/question_generator.py index 6d5d388cc5..9da22ff227 100644 --- a/haystack/nodes/question_generator/question_generator.py +++ b/haystack/nodes/question_generator/question_generator.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import AutoModelForSeq2SeqLM from transformers import AutoTokenizer @@ -93,6 +93,7 @@ def __init__( parameter is not used and a single CPU device is used for inference. """ + torch_and_transformers_import.check() super().__init__() self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) if len(self.devices) > 1: diff --git a/haystack/nodes/ranker/sentence_transformers.py b/haystack/nodes/ranker/sentence_transformers.py index 616d3a0bc5..7eae2a3c5a 100644 --- a/haystack/nodes/ranker/sentence_transformers.py +++ b/haystack/nodes/ranker/sentence_transformers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from torch.nn import DataParallel from transformers import AutoModelForSequenceClassification, AutoTokenizer diff --git a/haystack/nodes/reader/farm.py b/haystack/nodes/reader/farm.py index 85cc7a16cc..906b665970 100644 --- a/haystack/nodes/reader/farm.py +++ b/haystack/nodes/reader/farm.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from haystack.modeling.data_handler.data_silo import DataSilo, DistillationDataSilo from haystack.modeling.data_handler.processor import SquadProcessor, Processor diff --git a/haystack/nodes/reader/table.py b/haystack/nodes/reader/table.py index f3bc598f1b..5859ab3ccb 100644 --- a/haystack/nodes/reader/table.py +++ b/haystack/nodes/reader/table.py @@ -18,7 +18,7 @@ TableQuestionAnsweringPipeline = object TapasPreTrainedModel = object -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import ( # type: ignore TapasTokenizer, diff --git a/haystack/nodes/reader/transformers.py b/haystack/nodes/reader/transformers.py index 538bd3ad42..9ec8644629 100644 --- a/haystack/nodes/reader/transformers.py +++ b/haystack/nodes/reader/transformers.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from transformers.data.processors.squad import SquadExample diff --git a/haystack/nodes/retriever/_embedding_encoder.py b/haystack/nodes/retriever/_embedding_encoder.py index a40d11454d..b1df107a97 100644 --- a/haystack/nodes/retriever/_embedding_encoder.py +++ b/haystack/nodes/retriever/_embedding_encoder.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from sentence_transformers import InputExample, SentenceTransformer from torch.utils.data import DataLoader diff --git a/haystack/nodes/retriever/dense.py b/haystack/nodes/retriever/dense.py index b24ae225ef..ac2223b62e 100644 --- a/haystack/nodes/retriever/dense.py +++ b/haystack/nodes/retriever/dense.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from torch.nn import DataParallel from torch.utils.data.sampler import SequentialSampler diff --git a/haystack/nodes/retriever/multimodal/embedder.py b/haystack/nodes/retriever/multimodal/embedder.py index 528b7e5a42..61f2c756dd 100644 --- a/haystack/nodes/retriever/multimodal/embedder.py +++ b/haystack/nodes/retriever/multimodal/embedder.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from haystack.utils.torch_utils import get_devices # pylint: disable=ungrouped-imports from haystack.modeling.model.multimodal import get_model # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/retriever/multimodal/retriever.py b/haystack/nodes/retriever/multimodal/retriever.py index 47a26e9b5f..64ed31f77f 100644 --- a/haystack/nodes/retriever/multimodal/retriever.py +++ b/haystack/nodes/retriever/multimodal/retriever.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch diff --git a/haystack/nodes/sampler/top_p_sampler.py b/haystack/nodes/sampler/top_p_sampler.py index ec299612f9..b77e448760 100644 --- a/haystack/nodes/sampler/top_p_sampler.py +++ b/haystack/nodes/sampler/top_p_sampler.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from sentence_transformers import CrossEncoder from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/summarizer/transformers.py b/haystack/nodes/summarizer/transformers.py index b4202d96dc..1bed87d910 100644 --- a/haystack/nodes/summarizer/transformers.py +++ b/haystack/nodes/summarizer/transformers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import pipeline from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/haystack/nodes/translator/transformers.py b/haystack/nodes/translator/transformers.py index 2fef2eb014..014c5f9f81 100644 --- a/haystack/nodes/translator/transformers.py +++ b/haystack/nodes/translator/transformers.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -with LazyImport() as torch_and_transformers_import: +with LazyImport(message="Run 'pip install farm-haystack[inference]'") as torch_and_transformers_import: import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from haystack.modeling.utils import initialize_device_settings # pylint: disable=ungrouped-imports diff --git a/pyproject.toml b/pyproject.toml index 676800d9dc..52056e140f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ classifiers = [ dependencies = [ "requests", "pydantic", - "transformers[torch,sentencepiece]==4.30.1", + "transformers==4.30.1", "pandas", "rank_bm25", "scikit-learn>=1.0.0", # TF-IDF, SklearnQueryClassifier and metrics @@ -62,16 +62,15 @@ dependencies = [ "quantulum3", # quantities extraction from text "posthog", # telemetry # audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader - "huggingface-hub>=0.5.0", "tenacity", # retry decorator "sseclient-py", # server side events for OpenAI streaming "more_itertools", # utilities # Web Retriever "boilerpy3", - - # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder - "sentence-transformers>=2.2.0", + + # Multimodal Embedder haystack/nodes/retriever/multimodal/embedder.py + "Pillow", # OpenAI tokenizer "tiktoken>=0.3.2", @@ -89,6 +88,11 @@ dependencies = [ ] [project.optional-dependencies] +inference = [ + "transformers[torch,sentencepiece]==4.30.1", + "sentence-transformers>=2.2.0", # See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder + "huggingface-hub>=0.5.0", +] elasticsearch = [ "elasticsearch>=7.17,<8", ] @@ -212,11 +216,11 @@ formatting = [ ] all = [ - "farm-haystack[docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]", + "farm-haystack[inference,docstores,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx,beir,metrics]", ] all-gpu = [ # beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71 - "farm-haystack[docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]", + "farm-haystack[inference,docstores-gpu,audio,crawler,preprocessing,file-conversion,pdf,ocr,ray,onnx-gpu,metrics]", ] [project.scripts]