new: add gpu support for fastembed, add fastembed providers (#612)

* new: add gpu support for fastembed, add fastembed providers * new: update fastembed, add fastembed-gpu, add type alias for onnx providers * new: update readme * fix: fix poetry.lock
qdrant · Jun 20, 2024 · b488e17 · b488e17
1 parent d18cb17
commit b488e17
Show file tree

Hide file tree

Showing 5 changed files with 131 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -107,6 +107,23 @@ search_result = client.query(
 print(search_result)
 ```
 
+FastEmbed can also utilise GPU for faster embeddings. To enable GPU support, install
+```bash
+pip install 'qdrant-client[fastembed-gpu]'
+```
+
+```python
+from qdrant_client import QdrantClient
+
+# Initialize the client
+client = QdrantClient(":memory:")  # or QdrantClient(path="path/to/db")
+client.set_model(client.DEFAULT_EMBEDDING_MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+```
+
+> Note: `fastembed-gpu` and `fastembed` are mutually exclusive. You can only install one of them.
+>
+> If you previously installed `fastembed`, you might need to start from a fresh environment to install `fastembed-gpu`.
+
 ## Connect to Qdrant server
 
 To connect to Qdrant server, simply specify host and port:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,10 @@ grpcio-tools = ">=1.41.0"
 urllib3 = ">=1.26.14,<3"
 portalocker = "^2.7.0"
 fastembed = [
-    { version = "0.2.6", optional = true, python = "<3.13" }
+    { version = "0.2.7", optional = true, python = "<3.13" }
+]
+fastembed-gpu = [
+    { version = "0.2.7", optional = true, python = "<3.13" }
 ]
 
 [tool.poetry.group.dev.dependencies]
@@ -58,6 +61,7 @@ types-protobuf = "^4.21.0.5"
 
 [tool.poetry.extras]
 fastembed = ["fastembed"]
+fastembed-gpu = ["fastembed-gpu"]
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]

diff --git a/qdrant_client/async_qdrant_fastembed.py b/qdrant_client/async_qdrant_fastembed.py
@@ -12,7 +12,7 @@
 import uuid
 import warnings
 from itertools import tee
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 from qdrant_client.async_client_base import AsyncQdrantBase
 from qdrant_client.conversions import common_types as types
@@ -21,11 +21,12 @@
 from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
 
 try:
-    from fastembed import TextEmbedding
-    from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+    from fastembed import SparseTextEmbedding, TextEmbedding
+    from fastembed.common import OnnxProvider
 except ImportError:
     TextEmbedding = None
     SparseTextEmbedding = None
+    OnnxProvider = None
 SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
     {
         model["model"]: (model["dim"], models.Distance.COSINE)
@@ -51,8 +52,7 @@ def __init__(self, **kwargs: Any):
         self._embedding_model_name: Optional[str] = None
         self._sparse_embedding_model_name: Optional[str] = None
         try:
-            from fastembed import TextEmbedding
-            from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+            from fastembed import SparseTextEmbedding, TextEmbedding
 
             self.__class__._FASTEMBED_INSTALLED = True
         except ImportError:
@@ -75,6 +75,7 @@ def set_model(
         max_length: Optional[int] = None,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> None:
         """
@@ -86,6 +87,9 @@ def set_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -100,7 +104,11 @@ def set_model(
                 stacklevel=2,
             )
         self._get_or_init_model(
-            model_name=embedding_model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=embedding_model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         self._embedding_model_name = embedding_model_name
 
@@ -109,6 +117,7 @@ def set_sparse_model(
         embedding_model_name: Optional[str],
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
     ) -> None:
         """
         Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
@@ -119,6 +128,9 @@ def set_sparse_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -128,7 +140,10 @@ def set_sparse_model(
         """
         if embedding_model_name is not None:
             self._get_or_init_sparse_model(
-                model_name=embedding_model_name, cache_dir=cache_dir, threads=threads
+                model_name=embedding_model_name,
+                cache_dir=cache_dir,
+                threads=threads,
+                providers=providers,
             )
         self._sparse_embedding_model_name = embedding_model_name
 
@@ -155,6 +170,7 @@ def _get_or_init_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "TextEmbedding":
         if model_name in cls.embedding_models:
@@ -165,7 +181,11 @@ def _get_or_init_model(
                 f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_EMBEDDING_MODELS}"
             )
         cls.embedding_models[model_name] = TextEmbedding(
-            model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         return cls.embedding_models[model_name]
 
@@ -175,6 +195,7 @@ def _get_or_init_sparse_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "SparseTextEmbedding":
         if model_name in cls.sparse_embedding_models:
@@ -185,7 +206,11 @@ def _get_or_init_sparse_model(
                 f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_SPARSE_EMBEDDING_MODELS}"
             )
         cls.sparse_embedding_models[model_name] = SparseTextEmbedding(
-            model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         return cls.sparse_embedding_models[model_name]
 

diff --git a/qdrant_client/qdrant_fastembed.py b/qdrant_client/qdrant_fastembed.py
@@ -1,7 +1,7 @@
 import uuid
 import warnings
 from itertools import tee
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 from qdrant_client.client_base import QdrantBase
 from qdrant_client.conversions import common_types as types
@@ -10,11 +10,12 @@
 from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
 
 try:
-    from fastembed import TextEmbedding
-    from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+    from fastembed import SparseTextEmbedding, TextEmbedding
+    from fastembed.common import OnnxProvider
 except ImportError:
     TextEmbedding = None
     SparseTextEmbedding = None
+    OnnxProvider = None
 
 
 SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
@@ -45,8 +46,7 @@ def __init__(self, **kwargs: Any):
         self._embedding_model_name: Optional[str] = None
         self._sparse_embedding_model_name: Optional[str] = None
         try:
-            from fastembed import TextEmbedding  # noqa: F401
-            from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+            from fastembed import SparseTextEmbedding, TextEmbedding  # noqa: F401
 
             self.__class__._FASTEMBED_INSTALLED = True
         except ImportError:
@@ -70,6 +70,7 @@ def set_model(
         max_length: Optional[int] = None,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> None:
         """
@@ -81,6 +82,9 @@ def set_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -101,6 +105,7 @@ def set_model(
             model_name=embedding_model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         self._embedding_model_name = embedding_model_name
@@ -110,6 +115,7 @@ def set_sparse_model(
         embedding_model_name: Optional[str],
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
     ) -> None:
         """
         Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
@@ -120,6 +126,9 @@ def set_sparse_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -132,6 +141,7 @@ def set_sparse_model(
                 model_name=embedding_model_name,
                 cache_dir=cache_dir,
                 threads=threads,
+                providers=providers,
             )
         self._sparse_embedding_model_name = embedding_model_name
 
@@ -163,6 +173,7 @@ def _get_or_init_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "TextEmbedding":
         if model_name in cls.embedding_models:
@@ -179,6 +190,7 @@ def _get_or_init_model(
             model_name=model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         return cls.embedding_models[model_name]
@@ -189,6 +201,7 @@ def _get_or_init_sparse_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "SparseTextEmbedding":
         if model_name in cls.sparse_embedding_models:
@@ -205,6 +218,7 @@ def _get_or_init_sparse_model(
             model_name=model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         return cls.sparse_embedding_models[model_name]