diff --git a/README.md b/README.md
index cc75d4a6..5fd00bbb 100644
--- a/README.md
+++ b/README.md
@@ -107,6 +107,23 @@ search_result = client.query(
 print(search_result)
 ```
 
+FastEmbed can also utilise GPU for faster embeddings. To enable GPU support, install
+```bash
+pip install 'qdrant-client[fastembed-gpu]'
+```
+
+```python
+from qdrant_client import QdrantClient
+
+# Initialize the client
+client = QdrantClient(":memory:")  # or QdrantClient(path="path/to/db")
+client.set_model(client.DEFAULT_EMBEDDING_MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+```
+
+> Note: `fastembed-gpu` and `fastembed` are mutually exclusive. You can only install one of them.
+>
+> If you previously installed `fastembed`, you might need to start from a fresh environment to install `fastembed-gpu`.
+
 ## Connect to Qdrant server
 
 To connect to Qdrant server, simply specify host and port:
diff --git a/poetry.lock b/poetry.lock
index b0c0869d..3c58a8ff 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -564,13 +564,13 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth
 
 [[package]]
 name = "fastembed"
-version = "0.2.6"
+version = "0.2.7"
 description = "Fast, light, accurate library built for retrieval embedding generation"
 optional = true
 python-versions = "<3.13,>=3.8.0"
 files = [
-    {file = "fastembed-0.2.6-py3-none-any.whl", hash = "sha256:3e18633291722087abebccccd7fcdffafef643cb22d203370d7fad4fa83c10fb"},
-    {file = "fastembed-0.2.6.tar.gz", hash = "sha256:adaed5b46e19cc1bbe5f98f2b3ffecfc4d2a48d27512e28ff5bfe92a42649a66"},
+    {file = "fastembed-0.2.7-py3-none-any.whl", hash = "sha256:8f21e65e6d5d06bd8727488906c6c4eda8eb86d81be8879a54846dfe8a23c9d3"},
+    {file = "fastembed-0.2.7.tar.gz", hash = "sha256:f5537e3680694afcd3d806c19dd4514030fdc3144e7e9a9db0dfece771922503"},
 ]
 
 [package.dependencies]
@@ -583,7 +583,30 @@ numpy = [
 onnx = ">=1.15.0,<2.0.0"
 onnxruntime = ">=1.17.0,<2.0.0"
 requests = ">=2.31,<3.0"
-tokenizers = ">=0.15.1,<0.16.0"
+tokenizers = ">=0.15,<0.16"
+tqdm = ">=4.66,<5.0"
+
+[[package]]
+name = "fastembed-gpu"
+version = "0.2.7"
+description = "Fast, light, accurate library built for retrieval embedding generation"
+optional = true
+python-versions = "<3.13,>=3.8.0"
+files = [
+    {file = "fastembed_gpu-0.2.7-py3-none-any.whl", hash = "sha256:fdde2c0b78a837c38a09518299efc8c7e9a13d6a7cb21dd3072230b2a30593ed"},
+    {file = "fastembed_gpu-0.2.7.tar.gz", hash = "sha256:1fc33ce59cd6fd2816b348c0b4527e5218ff9c33c24b8efcf54f71accb7b055c"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.20,<0.21"
+loguru = ">=0.7.2,<0.8.0"
+numpy = [
+    {version = ">=1.21", markers = "python_version < \"3.12\""},
+    {version = ">=1.26", markers = "python_version >= \"3.12\""},
+]
+onnxruntime-gpu = ">=1.17.0,<2.0.0"
+requests = ">=2.31,<3.0"
+tokenizers = ">=0.15,<0.16"
 tqdm = ">=4.66,<5.0"
 
 [[package]]
@@ -1703,6 +1726,33 @@ packaging = "*"
 protobuf = "*"
 sympy = "*"
 
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.17.1"
+description = "ONNX Runtime is a runtime accelerator for Machine Learning models"
+optional = true
+python-versions = "*"
+files = [
+    {file = "onnxruntime_gpu-1.17.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a55fe84ee11a59ea069c6a790ee960f1c7da0d7d6c74822b2a8b357027c93646"},
+    {file = "onnxruntime_gpu-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:a9abefceb32879cbee9f57977d6bb8d58cbac501f8a64bf96bca2f4fdff157fe"},
+    {file = "onnxruntime_gpu-1.17.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b2cd54f2b0a05e6bc9ab30182b859364d30115a19c31be24aa2edef40be00277"},
+    {file = "onnxruntime_gpu-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:bdffcced8a5f6275c0df202220e9232138b336f868cd671c9d2c571e834d2a80"},
+    {file = "onnxruntime_gpu-1.17.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a1c871e8d0ae4121ea6528fc9410a5a7cbc5e43714b30521d5514fd10b987c83"},
+    {file = "onnxruntime_gpu-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:9a0a94eda080e9f4a8e5035fdf0b3c24f5533e7861d88833a94493e63fca0812"},
+    {file = "onnxruntime_gpu-1.17.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:624fdb65a632833f13de36854855818680be4f77942d8114524491d58f60d3ab"},
+    {file = "onnxruntime_gpu-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:29fa78d232bbb5a5be3a3e0a022148a7b3df2ca66b4c21a11eef56e6f22859e9"},
+    {file = "onnxruntime_gpu-1.17.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b0f8c70f2f9aeae825f3a397cc0c5f45124f9ae7c173263cf13c495982b0b99a"},
+    {file = "onnxruntime_gpu-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:b1a27a104334461b690e4fc62775e1e71c68936399874932225d7fea21a0c261"},
+]
+
+[package.dependencies]
+coloredlogs = "*"
+flatbuffers = "*"
+numpy = ">=1.21.6"
+packaging = "*"
+protobuf = "*"
+sympy = "*"
+
 [[package]]
 name = "packaging"
 version = "24.0"
@@ -3035,8 +3085,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 
 [extras]
 fastembed = ["fastembed"]
+fastembed-gpu = ["fastembed-gpu"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8"
-content-hash = "d9f322012dac6de2c4f537befddade3c059b8f2dcd9420ff742450739da4dc03"
+content-hash = "5a438b3f01f44b4b86d95f7c71e9bf3cae3a67316dc27cb657f4539e08570bb3"
diff --git a/pyproject.toml b/pyproject.toml
index 5686be50..3a634038 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,10 @@ grpcio-tools = ">=1.41.0"
 urllib3 = ">=1.26.14,<3"
 portalocker = "^2.7.0"
 fastembed = [
-    { version = "0.2.6", optional = true, python = "<3.13" }
+    { version = "0.2.7", optional = true, python = "<3.13" }
+]
+fastembed-gpu = [
+    { version = "0.2.7", optional = true, python = "<3.13" }
 ]
 
 [tool.poetry.group.dev.dependencies]
@@ -58,6 +61,7 @@ types-protobuf = "^4.21.0.5"
 
 [tool.poetry.extras]
 fastembed = ["fastembed"]
+fastembed-gpu = ["fastembed-gpu"]
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/qdrant_client/async_qdrant_fastembed.py b/qdrant_client/async_qdrant_fastembed.py
index 888d8e8a..89b48d8b 100644
--- a/qdrant_client/async_qdrant_fastembed.py
+++ b/qdrant_client/async_qdrant_fastembed.py
@@ -12,7 +12,7 @@
 import uuid
 import warnings
 from itertools import tee
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 from qdrant_client.async_client_base import AsyncQdrantBase
 from qdrant_client.conversions import common_types as types
@@ -21,11 +21,12 @@
 from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
 
 try:
-    from fastembed import TextEmbedding
-    from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+    from fastembed import SparseTextEmbedding, TextEmbedding
+    from fastembed.common import OnnxProvider
 except ImportError:
     TextEmbedding = None
     SparseTextEmbedding = None
+    OnnxProvider = None
 SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
     {
         model["model"]: (model["dim"], models.Distance.COSINE)
@@ -51,8 +52,7 @@ def __init__(self, **kwargs: Any):
         self._embedding_model_name: Optional[str] = None
         self._sparse_embedding_model_name: Optional[str] = None
         try:
-            from fastembed import TextEmbedding
-            from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+            from fastembed import SparseTextEmbedding, TextEmbedding
 
             self.__class__._FASTEMBED_INSTALLED = True
         except ImportError:
@@ -75,6 +75,7 @@ def set_model(
         max_length: Optional[int] = None,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> None:
         """
@@ -86,6 +87,9 @@ def set_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -100,7 +104,11 @@ def set_model(
                 stacklevel=2,
             )
         self._get_or_init_model(
-            model_name=embedding_model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=embedding_model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         self._embedding_model_name = embedding_model_name
 
@@ -109,6 +117,7 @@ def set_sparse_model(
         embedding_model_name: Optional[str],
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
     ) -> None:
         """
         Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
@@ -119,6 +128,9 @@ def set_sparse_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -128,7 +140,10 @@ def set_sparse_model(
         """
         if embedding_model_name is not None:
             self._get_or_init_sparse_model(
-                model_name=embedding_model_name, cache_dir=cache_dir, threads=threads
+                model_name=embedding_model_name,
+                cache_dir=cache_dir,
+                threads=threads,
+                providers=providers,
             )
         self._sparse_embedding_model_name = embedding_model_name
 
@@ -155,6 +170,7 @@ def _get_or_init_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "TextEmbedding":
         if model_name in cls.embedding_models:
@@ -165,7 +181,11 @@ def _get_or_init_model(
                 f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_EMBEDDING_MODELS}"
             )
         cls.embedding_models[model_name] = TextEmbedding(
-            model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         return cls.embedding_models[model_name]
 
@@ -175,6 +195,7 @@ def _get_or_init_sparse_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "SparseTextEmbedding":
         if model_name in cls.sparse_embedding_models:
@@ -185,7 +206,11 @@ def _get_or_init_sparse_model(
                 f"Unsupported embedding model: {model_name}. Supported models: {SUPPORTED_SPARSE_EMBEDDING_MODELS}"
             )
         cls.sparse_embedding_models[model_name] = SparseTextEmbedding(
-            model_name=model_name, cache_dir=cache_dir, threads=threads, **kwargs
+            model_name=model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            **kwargs,
         )
         return cls.sparse_embedding_models[model_name]
 
diff --git a/qdrant_client/qdrant_fastembed.py b/qdrant_client/qdrant_fastembed.py
index 8850f4ac..d45d3378 100644
--- a/qdrant_client/qdrant_fastembed.py
+++ b/qdrant_client/qdrant_fastembed.py
@@ -1,7 +1,7 @@
 import uuid
 import warnings
 from itertools import tee
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 from qdrant_client.client_base import QdrantBase
 from qdrant_client.conversions import common_types as types
@@ -10,11 +10,12 @@
 from qdrant_client.hybrid.fusion import reciprocal_rank_fusion
 
 try:
-    from fastembed import TextEmbedding
-    from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+    from fastembed import SparseTextEmbedding, TextEmbedding
+    from fastembed.common import OnnxProvider
 except ImportError:
     TextEmbedding = None
     SparseTextEmbedding = None
+    OnnxProvider = None
 
 
 SUPPORTED_EMBEDDING_MODELS: Dict[str, Tuple[int, models.Distance]] = (
@@ -45,8 +46,7 @@ def __init__(self, **kwargs: Any):
         self._embedding_model_name: Optional[str] = None
         self._sparse_embedding_model_name: Optional[str] = None
         try:
-            from fastembed import TextEmbedding  # noqa: F401
-            from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
+            from fastembed import SparseTextEmbedding, TextEmbedding  # noqa: F401
 
             self.__class__._FASTEMBED_INSTALLED = True
         except ImportError:
@@ -70,6 +70,7 @@ def set_model(
         max_length: Optional[int] = None,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> None:
         """
@@ -81,6 +82,9 @@ def set_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -101,6 +105,7 @@ def set_model(
             model_name=embedding_model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         self._embedding_model_name = embedding_model_name
@@ -110,6 +115,7 @@ def set_sparse_model(
         embedding_model_name: Optional[str],
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
     ) -> None:
         """
         Set sparse embedding model to use for hybrid search over documents in combination with dense embeddings.
@@ -120,6 +126,9 @@ def set_sparse_model(
                                        Can be set using the `FASTEMBED_CACHE_PATH` env variable.
                                        Defaults to `fastembed_cache` in the system's temp directory.
             threads (int, optional): The number of threads single onnxruntime session can use. Defaults to None.
+            providers: The list of onnx providers (with or without options) to use. Defaults to None.
+                Example configuration:
+                https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#configuration-options
         Raises:
             ValueError: If embedding model is not supported.
             ImportError: If fastembed is not installed.
@@ -132,6 +141,7 @@ def set_sparse_model(
                 model_name=embedding_model_name,
                 cache_dir=cache_dir,
                 threads=threads,
+                providers=providers,
             )
         self._sparse_embedding_model_name = embedding_model_name
 
@@ -163,6 +173,7 @@ def _get_or_init_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "TextEmbedding":
         if model_name in cls.embedding_models:
@@ -179,6 +190,7 @@ def _get_or_init_model(
             model_name=model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         return cls.embedding_models[model_name]
@@ -189,6 +201,7 @@ def _get_or_init_sparse_model(
         model_name: str,
         cache_dir: Optional[str] = None,
         threads: Optional[int] = None,
+        providers: Optional[Sequence["OnnxProvider"]] = None,
         **kwargs: Any,
     ) -> "SparseTextEmbedding":
         if model_name in cls.sparse_embedding_models:
@@ -205,6 +218,7 @@ def _get_or_init_sparse_model(
             model_name=model_name,
             cache_dir=cache_dir,
             threads=threads,
+            providers=providers,
             **kwargs,
         )
         return cls.sparse_embedding_models[model_name]