Skip to content

Commit

Permalink
new: add gpu support for fastembed, add fastembed providers (#612)
Browse files Browse the repository at this point in the history
* new: add gpu support for fastembed, add fastembed providers

* new: update fastembed, add fastembed-gpu, add type alias for onnx providers

* new: update readme

* fix: fix poetry.lock
  • Loading branch information
joein committed Jun 20, 2024
1 parent d18cb17 commit b488e17
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 20 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ search_result = client.query(
print(search_result)
```

FastEmbed can also utilise GPU for faster embeddings. To enable GPU support, install
```bash
pip install 'qdrant-client[fastembed-gpu]'
```

```python
from qdrant_client import QdrantClient

# Initialize the client
client = QdrantClient(":memory:") # or QdrantClient(path="path/to/db")
client.set_model(client.DEFAULT_EMBEDDING_MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
```

> Note: `fastembed-gpu` and `fastembed` are mutually exclusive. You can only install one of them.
>
> If you previously installed `fastembed`, you might need to start from a fresh environment to install `fastembed-gpu`.
## Connect to Qdrant server

To connect to Qdrant server, simply specify host and port:
Expand Down
61 changes: 56 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ grpcio-tools = ">=1.41.0"
urllib3 = ">=1.26.14,<3"
portalocker = "^2.7.0"
fastembed = [
{ version = "0.2.6", optional = true, python = "<3.13" }
{ version = "0.2.7", optional = true, python = "<3.13" }
]
fastembed-gpu = [
{ version = "0.2.7", optional = true, python = "<3.13" }
]

[tool.poetry.group.dev.dependencies]
Expand Down Expand Up @@ -58,6 +61,7 @@ types-protobuf = "^4.21.0.5"

[tool.poetry.extras]
fastembed = ["fastembed"]
fastembed-gpu = ["fastembed-gpu"]

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
43 changes: 34 additions & 9 deletions qdrant_client/async_qdrant_fastembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import uuid
import warnings
from itertools import tee
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union

from qdrant_client.async_client_base import AsyncQdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -21,11 +21,12 @@
from qdrant_client.hybrid.fusion import reciprocal_rank_fusion

try:
from fastembed import TextEmbedding
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
from fastembed import SparseTextEmbedding, TextEmbedding
from fastembed.common import OnnxProvider
except ImportError:
TextEmbedding = None
SparseTextEmbedding = None
OnnxProvider = None
SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
{
model["model"]: (model["dim"], models.Distance.COSINE)
Expand All @@ -51,8 +52,7 @@ def __init__(self, **kwargs: Any):
self._embedding_model_name: Optional[str] = None
self._sparse_embedding_model_name: Optional[str] = None
try:
from fastembed import TextEmbedding
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
from fastembed import SparseTextEmbedding, TextEmbedding

self.__class__._FASTEMBED_INSTALLED = True
except ImportError:
Expand All @@ -75,6 +75,7 @@ def set_model(
max_length: Optional[int] = None,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> None:
"""
Expand All @@ -86,6 +87,9 @@ def set_model(
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
Defaults to `fastembed_cache` in the system's temp directory.
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
providers: The list of onnx providers (with or without options) to use. Defaults to None.
Example configuration:
https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
Raises:
ValueError: If embedding model is not supported.
ImportError: If fastembed is not installed.
Expand All @@ -100,7 +104,11 @@ def set_model(
stacklevel=2,
)
self._get_or_init_model(
model_name=embedding_model_name, cache_dir=cache_dir, threads=threads, **kwargs
model_name=embedding_model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
self._embedding_model_name = embedding_model_name

Expand All @@ -109,6 +117,7 @@ def set_sparse_model(
embedding_model_name: Optional[str],
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
) -> None:
"""
Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
Expand All @@ -119,6 +128,9 @@ def set_sparse_model(
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
Defaults to `fastembed_cache` in the system's temp directory.
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
providers: The list of onnx providers (with or without options) to use. Defaults to None.
Example configuration:
https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
Raises:
ValueError: If embedding model is not supported.
ImportError: If fastembed is not installed.
Expand All @@ -128,7 +140,10 @@ def set_sparse_model(
"""
if embedding_model_name is not None:
self._get_or_init_sparse_model(
model_name=embedding_model_name, cache_dir=cache_dir, threads=threads
model_name=embedding_model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
)
self._sparse_embedding_model_name = embedding_model_name

Expand All @@ -155,6 +170,7 @@ def _get_or_init_model(
model_name: str,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> "TextEmbedding":
if model_name in cls.embedding_models:
Expand All @@ -165,7 +181,11 @@ def _get_or_init_model(
f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_EMBEDDING_MODELS}"
)
cls.embedding_models[model_name] = TextEmbedding(
model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
model_name=model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
return cls.embedding_models[model_name]

Expand All @@ -175,6 +195,7 @@ def _get_or_init_sparse_model(
model_name: str,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> "SparseTextEmbedding":
if model_name in cls.sparse_embedding_models:
Expand All @@ -185,7 +206,11 @@ def _get_or_init_sparse_model(
f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_SPARSE_EMBEDDING_MODELS}"
)
cls.sparse_embedding_models[model_name] = SparseTextEmbedding(
model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
model_name=model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
return cls.sparse_embedding_models[model_name]

Expand Down
24 changes: 19 additions & 5 deletions qdrant_client/qdrant_fastembed.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import uuid
import warnings
from itertools import tee
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union

from qdrant_client.client_base import QdrantBase
from qdrant_client.conversions import common_types as types
Expand All @@ -10,11 +10,12 @@
from qdrant_client.hybrid.fusion import reciprocal_rank_fusion

try:
from fastembed import TextEmbedding
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
from fastembed import SparseTextEmbedding, TextEmbedding
from fastembed.common import OnnxProvider
except ImportError:
TextEmbedding = None
SparseTextEmbedding = None
OnnxProvider = None


SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
Expand Down Expand Up @@ -45,8 +46,7 @@ def __init__(self, **kwargs: Any):
self._embedding_model_name: Optional[str] = None
self._sparse_embedding_model_name: Optional[str] = None
try:
from fastembed import TextEmbedding # noqa: F401
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
from fastembed import SparseTextEmbedding, TextEmbedding # noqa: F401

self.__class__._FASTEMBED_INSTALLED = True
except ImportError:
Expand All @@ -70,6 +70,7 @@ def set_model(
max_length: Optional[int] = None,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> None:
"""
Expand All @@ -81,6 +82,9 @@ def set_model(
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
Defaults to `fastembed_cache` in the system's temp directory.
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
providers: The list of onnx providers (with or without options) to use. Defaults to None.
Example configuration:
https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
Raises:
ValueError: If embedding model is not supported.
ImportError: If fastembed is not installed.
Expand All @@ -101,6 +105,7 @@ def set_model(
model_name=embedding_model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
self._embedding_model_name = embedding_model_name
Expand All @@ -110,6 +115,7 @@ def set_sparse_model(
embedding_model_name: Optional[str],
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
) -> None:
"""
Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
Expand All @@ -120,6 +126,9 @@ def set_sparse_model(
Can be set using the `FASTEMBED_CACHE_PATH` env variable.
Defaults to `fastembed_cache` in the system's temp directory.
threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
providers: The list of onnx providers (with or without options) to use. Defaults to None.
Example configuration:
https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
Raises:
ValueError: If embedding model is not supported.
ImportError: If fastembed is not installed.
Expand All @@ -132,6 +141,7 @@ def set_sparse_model(
model_name=embedding_model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
)
self._sparse_embedding_model_name = embedding_model_name

Expand Down Expand Up @@ -163,6 +173,7 @@ def _get_or_init_model(
model_name: str,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> "TextEmbedding":
if model_name in cls.embedding_models:
Expand All @@ -179,6 +190,7 @@ def _get_or_init_model(
model_name=model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
return cls.embedding_models[model_name]
Expand All @@ -189,6 +201,7 @@ def _get_or_init_sparse_model(
model_name: str,
cache_dir: Optional[str] = None,
threads: Optional[int] = None,
providers: Optional[Sequence["OnnxProvider"]] = None,
**kwargs: Any,
) -> "SparseTextEmbedding":
if model_name in cls.sparse_embedding_models:
Expand All @@ -205,6 +218,7 @@ def _get_or_init_sparse_model(
model_name=model_name,
cache_dir=cache_dir,
threads=threads,
providers=providers,
**kwargs,
)
return cls.sparse_embedding_models[model_name]
Expand Down

0 comments on commit b488e17

Please sign in to comment.