From f65070087fcc0c1f7e6add9f9df545ca9f66b7b1 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Tue, 3 Dec 2024 03:40:28 -0500 Subject: [PATCH 1/4] Feature: Integrate Milvus as the VectorDatabase --- .../databases/vector/create_vector_engine.py | 43 ++- .../databases/vector/milvus/MilvusAdapter.py | 245 ++++++++++++++++++ .../databases/vector/milvus/__init__.py | 1 + cognee/tests/test_milvus.py | 76 ++++++ poetry.lock | 138 +++++++++- pyproject.toml | 1 + 6 files changed, 486 insertions(+), 18 deletions(-) create mode 100644 cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py create mode 100644 cognee/infrastructure/databases/vector/milvus/__init__.py create mode 100644 cognee/tests/test_milvus.py diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 4b4799ee..5dda755f 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -1,11 +1,13 @@ from typing import Dict + class VectorConfig(Dict): vector_db_url: str vector_db_port: str vector_db_key: str vector_db_provider: str + def create_vector_engine(config: VectorConfig, embedding_engine): if config["vector_db_provider"] == "weaviate": from .weaviate_db import WeaviateAdapter @@ -16,24 +18,37 @@ def create_vector_engine(config: VectorConfig, embedding_engine): return WeaviateAdapter( config["vector_db_url"], config["vector_db_key"], - embedding_engine = embedding_engine + embedding_engine=embedding_engine ) elif config["vector_db_provider"] == "qdrant": if not (config["vector_db_url"] and config["vector_db_key"]): raise EnvironmentError("Missing requred Qdrant credentials!") - + from .qdrant.QDrantAdapter import QDrantAdapter return QDrantAdapter( - url = config["vector_db_url"], - api_key = config["vector_db_key"], - embedding_engine = embedding_engine + url=config["vector_db_url"], + api_key=config["vector_db_key"], + embedding_engine=embedding_engine + ) + + elif config['vector_db_provider'] == 'milvus': + from .milvus.MilvusAdapter import MilvusAdapter + + if not config["vector_db_url"]: + raise EnvironmentError("Missing required Milvus credentials!") + + return MilvusAdapter( + url=config["vector_db_url"], + api_key=config['vector_db_key'], + embedding_engine=embedding_engine ) + elif config["vector_db_provider"] == "pgvector": from cognee.infrastructure.databases.relational import get_relational_config - + # Get configuration for postgres database relational_config = get_relational_config() db_username = relational_config.db_username @@ -52,8 +67,8 @@ def create_vector_engine(config: VectorConfig, embedding_engine): from .pgvector.PGVectorAdapter import PGVectorAdapter return PGVectorAdapter( - connection_string, - config["vector_db_key"], + connection_string, + config["vector_db_key"], embedding_engine, ) @@ -64,16 +79,16 @@ def create_vector_engine(config: VectorConfig, embedding_engine): from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter return FalkorDBAdapter( - database_url = config["vector_db_url"], - database_port = config["vector_db_port"], - embedding_engine = embedding_engine, + database_url=config["vector_db_url"], + database_port=config["vector_db_port"], + embedding_engine=embedding_engine, ) else: from .lancedb.LanceDBAdapter import LanceDBAdapter return LanceDBAdapter( - url = config["vector_db_url"], - api_key = config["vector_db_key"], - embedding_engine = embedding_engine, + url=config["vector_db_url"], + api_key=config["vector_db_key"], + embedding_engine=embedding_engine, ) diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py new file mode 100644 index 00000000..bfc0bbd1 --- /dev/null +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -0,0 +1,245 @@ +import asyncio +import logging +from typing import List, Optional +from uuid import UUID +from cognee.infrastructure.engine import DataPoint +from ..vector_db_interface import VectorDBInterface +from ..models.ScoredResult import ScoredResult +from ..embeddings.EmbeddingEngine import EmbeddingEngine +from pymilvus import MilvusClient + +logger = logging.getLogger("MilvusAdapter") + + +class IndexSchema(DataPoint): + text: str + + _metadata: dict = { + "index_fields": ["text"] + } + + +class MilvusAdapter(VectorDBInterface): + name = "Milvus" + url: str + api_key: Optional[str] + embedding_engine: EmbeddingEngine = None + + def __init__(self, url: str, api_key: Optional[str], embedding_engine: EmbeddingEngine): + self.url = url + self.api_key = api_key + + self.embedding_engine = embedding_engine + + def get_milvus_client(self) -> MilvusClient: + if self.api_key is not None: + client = MilvusClient(uri=self.url, token=self.api_key) + else: + client = MilvusClient(uri=self.url) + return client + + async def embed_data(self, data: List[str]) -> list[list[float]]: + return await self.embedding_engine.embed_text(data) + + async def has_collection(self, collection_name: str) -> bool: + future = asyncio.Future() + client = self.get_milvus_client() + future.set_result(client.has_collection(collection_name=collection_name)) + + return await future + + async def create_collection( + self, + collection_name: str, + payload_schema=None, + ): + from pymilvus import DataType, MilvusException + client = self.get_milvus_client() + if client.has_collection(collection_name=collection_name): + logger.info(f"Collection '{collection_name}' already exists.") + return True + + try: + dimension = self.embedding_engine.get_vector_size() + assert dimension > 0, "Embedding dimension must be greater than 0." + + schema = client.create_schema( + auto_id=False, + enable_dynamic_field=False, + ) + + schema.add_field( + field_name="id", + datatype=DataType.VARCHAR, + is_primary=True, + max_length=36 + ) + + schema.add_field( + field_name="vector", + datatype=DataType.FLOAT_VECTOR, + dim=dimension + ) + + schema.add_field( + field_name="text", + datatype=DataType.VARCHAR, + max_length=60535 + ) + + index_params = client.prepare_index_params() + index_params.add_index( + field_name="vector", + metric_type="COSINE" + ) + + client.create_collection( + collection_name=collection_name, + schema=schema, + index_params=index_params + ) + + client.load_collection(collection_name) + + logger.info(f"Collection '{collection_name}' created successfully.") + return True + except MilvusException as e: + logger.error(f"Error creating collection '{collection_name}': {str(e)}") + raise e + + async def create_data_points( + self, + collection_name: str, + data_points: List[DataPoint] + ): + from pymilvus import MilvusException + client = self.get_milvus_client() + data_vectors = await self.embed_data( + [data_point.get_embeddable_data() for data_point in data_points] + ) + + insert_data = [ + { + "id": str(data_point.id), + "vector": data_vectors[index], + "text": data_point.text, + } + for index, data_point in enumerate(data_points) + ] + + try: + result = client.insert( + collection_name=collection_name, + data=insert_data + ) + logger.info( + f"Inserted {result.get('insert_count', 0)} data points into collection '{collection_name}'." + ) + return result + except MilvusException as e: + logger.error(f"Error inserting data points into collection '{collection_name}': {str(e)}") + raise e + + async def create_vector_index(self, index_name: str, index_property_name: str): + await self.create_collection(f"{index_name}_{index_property_name}") + + async def index_data_points(self, index_name: str, index_property_name: str, data_points: List[DataPoint]): + formatted_data_points = [ + IndexSchema( + id=data_point.id, + text=getattr(data_point, data_point._metadata["index_fields"][0]), + ) + for data_point in data_points + ] + collection_name = f"{index_name}_{index_property_name}" + await self.create_data_points(collection_name, formatted_data_points) + + async def retrieve(self, collection_name: str, data_point_ids: list[str]): + from pymilvus import MilvusException + client = self.get_milvus_client() + try: + filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]""" + + results = client.query( + collection_name=collection_name, + expr=filter_expression, + output_fields=["*"], + ) + return results + except MilvusException as e: + logger.error(f"Error retrieving data points from collection '{collection_name}': {str(e)}") + raise e + + async def search( + self, + collection_name: str, + query_text: Optional[str] = None, + query_vector: Optional[List[float]] = None, + limit: int = 5, + with_vector: bool = False, + ): + from pymilvus import MilvusException + client = self.get_milvus_client() + if query_text is None and query_vector is None: + raise ValueError("One of query_text or query_vector must be provided!") + + try: + query_vector = query_vector or (await self.embed_data([query_text]))[0] + + output_fields = ["id", "text"] + if with_vector: + output_fields.append("vector") + + results = client.search( + collection_name=collection_name, + data=[query_vector], + anns_field="vector", + limit=limit, + output_fields=output_fields, + search_params={ + "metric_type": "COSINE", + }, + ) + + return [ + ScoredResult( + id=UUID(result["id"]), + score=result["distance"], + payload=result.get("entity", {}), + ) + for result in results[0] + ] + except MilvusException as e: + logger.error(f"Error during search in collection '{collection_name}': {str(e)}") + raise e + + async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False): + def query_search(query_vector): + return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors) + + return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)] + + async def delete_data_points(self, collection_name: str, data_point_ids: list[str]): + from pymilvus import MilvusException + client = self.get_milvus_client() + try: + filter_expression = f"""id in [{", ".join(f'"{id}"' for id in data_point_ids)}]""" + + delete_result = client.delete( + collection_name=collection_name, + filter=filter_expression + ) + + logger.info(f"Deleted data points with IDs {data_point_ids} from collection '{collection_name}'.") + return delete_result + except MilvusException as e: + logger.error(f"Error deleting data points from collection '{collection_name}': {str(e)}") + raise e + + async def prune(self): + client = self.get_milvus_client() + if client: + collections = client.list_collections() + for collection_name in collections: + client.drop_collection(collection_name=collection_name) + client.close() diff --git a/cognee/infrastructure/databases/vector/milvus/__init__.py b/cognee/infrastructure/databases/vector/milvus/__init__.py new file mode 100644 index 00000000..ecb3cb14 --- /dev/null +++ b/cognee/infrastructure/databases/vector/milvus/__init__.py @@ -0,0 +1 @@ +from .MilvusAdapter import MilvusAdapter diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py new file mode 100644 index 00000000..d565f644 --- /dev/null +++ b/cognee/tests/test_milvus.py @@ -0,0 +1,76 @@ +import os +import logging +import pathlib +import cognee +from cognee.api.v1.search import SearchType + +logging.basicConfig(level=logging.DEBUG) + + +async def main(): + cognee.config.set_vector_db_provider("milvus") + data_directory_path = str( + pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_milvus")).resolve()) + cognee.config.data_root_directory(data_directory_path) + cognee_directory_path = str( + pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve()) + cognee.config.system_root_directory(cognee_directory_path) + + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + dataset_name = "cs_explanations" + + explanation_file_path = os.path.join(pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt") + await cognee.add([explanation_file_path], dataset_name) + + text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. + At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. + Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. + The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. + Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. + In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. + """ + + await cognee.add([text], dataset_name) + + await cognee.cognify([dataset_name]) + + from cognee.infrastructure.databases.vector import get_vector_engine + vector_engine = get_vector_engine() + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] + random_node_name = random_node.payload["text"] + + search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\n\nExtracted INSIGHTS are:\n") + for result in search_results: + print(f"{result}\n") + + search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\n\nExtracted CHUNKS are:\n") + for result in search_results: + print(f"{result}\n") + + search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name) + assert len(search_results) != 0, "The search results list is empty." + print("\nExtracted SUMMARIES are:\n") + for result in search_results: + print(f"{result}\n") + + history = await cognee.get_search_history() + assert len(history) == 6, "Search history is not correct." + + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + milvus_client = get_vector_engine().get_milvus_client() + collections = milvus_client.list_collections() + assert len(collections) == 0, "Milvus vector database is not empty" + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/poetry.lock b/poetry.lock index 7d09c340..4b826264 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2751,6 +2751,8 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, + {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, + {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -3602,6 +3604,22 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] +[[package]] +name = "milvus-lite" +version = "2.4.10" +description = "A lightweight version of Milvus wrapped with Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, + {file = "milvus_lite-2.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:74a8e07c5e3b057df17fbb46913388e84df1dc403a200f4e423799a58184c800"}, + {file = "milvus_lite-2.4.10-py3-none-manylinux2014_aarch64.whl", hash = "sha256:240c7386b747bad696ecb5bd1f58d491e86b9d4b92dccee3315ed7256256eddc"}, + {file = "milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl", hash = "sha256:211d2e334a043f9282bdd9755f76b9b2d93b23bffa7af240919ffce6a8dfe325"}, +] + +[package.dependencies] +tqdm = "*" + [[package]] name = "mistune" version = "3.0.2" @@ -4938,7 +4956,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5360,6 +5378,31 @@ pyyaml = "*" [package.extras] extra = ["pygments (>=2.12)"] +[[package]] +name = "pymilvus" +version = "2.5.0" +description = "Python Sdk for Milvus" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"}, + {file = "pymilvus-2.5.0.tar.gz", hash = "sha256:4da14a3bd957a4921166f9355fd1f1ac5c5e4e80b46f12f64d9c9a6dcb8cb395"}, +] + +[package.dependencies] +grpcio = ">=1.49.1,<=1.67.1" +milvus-lite = {version = ">=2.4.0", markers = "sys_platform != \"win32\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +python-dotenv = ">=1.0.1,<2.0.0" +setuptools = ">69" +ujson = ">=2.0.0" + +[package.extras] +bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"] +dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] +model = ["milvus-model (>=0.1.0)"] + [[package]] name = "pyparsing" version = "3.2.0" @@ -7075,6 +7118,93 @@ files = [ {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] +[[package]] +name = "ujson" +version = "5.10.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, + {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"}, + {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"}, + {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"}, + {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"}, + {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"}, + {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"}, + {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"}, + {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"}, + {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"}, + {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"}, + {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"}, + {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"}, + {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"}, + {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"}, +] + [[package]] name = "uri-template" version = "1.3.0" @@ -7645,4 +7775,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "6b57d44b0924bcf64397b3807c2a6ba369166e1d2102b5312c8f8ae2d5323376" +content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3" diff --git a/pyproject.toml b/pyproject.toml index 6fac2597..44ca875a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ asyncpg = {version = "0.30.0", optional = true} pgvector = {version = "^0.3.5", optional = true} psycopg2 = {version = "^2.9.10", optional = true} llama-index-core = {version = "^0.11.22", optional = true} +pymilvus = "^2.5.0" [tool.poetry.extras] filesystem = ["s3fs", "botocore"] From fb5f0cf00fdc1dff830531594c8e6c79504a2bdc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 10:37:50 +0100 Subject: [PATCH 2/4] chore: Make milvus an optional dependency Make Milvus an optional dependency, expand docs with Milvus information Chore --- .env.template | 2 +- README.md | 12 ++++++++++++ pyproject.toml | 4 ++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.env.template b/.env.template index ff591c0a..75a57de4 100644 --- a/.env.template +++ b/.env.template @@ -14,7 +14,7 @@ GRAPH_DATABASE_URL= GRAPH_DATABASE_USERNAME= GRAPH_DATABASE_PASSWORD= -# "qdrant", "pgvector", "weaviate" or "lancedb" +# "qdrant", "pgvector", "weaviate", "milvus" or "lancedb" VECTOR_DB_PROVIDER="lancedb" # Not needed if using "lancedb" or "pgvector" VECTOR_DB_URL= diff --git a/README.md b/README.md index 2b29f144..efb6e23b 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ pip install 'cognee[qdrant]' pip install 'cognee[neo4j]' ``` +### With pip with Milvus support + +```bash +pip install 'cognee[milvus]' +``` + ### With poetry ```bash @@ -83,6 +89,12 @@ poetry add cognee -E qdrant poetry add cognee -E neo4j ``` +### With poetry with Milvus support + +```bash +poetry add cognee -E milvus +``` + ## 💻 Basic Usage diff --git a/pyproject.toml b/pyproject.toml index 44ca875a..c66b23c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ asyncpg = {version = "0.30.0", optional = true} pgvector = {version = "^0.3.5", optional = true} psycopg2 = {version = "^2.9.10", optional = true} llama-index-core = {version = "^0.11.22", optional = true} -pymilvus = "^2.5.0" +pymilvus = {version = "^2.5.0", optional = true} [tool.poetry.extras] filesystem = ["s3fs", "botocore"] @@ -85,7 +85,7 @@ posthog = ["posthog"] falkordb = ["falkordb"] groq = ["groq"] langfuse = ["langfuse"] - +milvus = ["pymilvus"] [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" From 764c0895dfb884e1e054942d6d8af76b5ab57c16 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 11:13:54 +0100 Subject: [PATCH 3/4] fix: Resolve Milvus connection issue, add config to milvus test, add milvus gh action Resolved if statement resolution issue regrading api key, Added vector db config to milvus test, Added milvus gh action Fix --- .github/workflows/test_milvus.yml | 64 +++++++++++++++++++ .../databases/vector/milvus/MilvusAdapter.py | 8 ++- cognee/tests/test_milvus.py | 8 +++ poetry.lock | 17 +++-- 4 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/test_milvus.yml diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml new file mode 100644 index 00000000..2cfd8899 --- /dev/null +++ b/.github/workflows/test_milvus.yml @@ -0,0 +1,64 @@ +name: test | milvus + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [labeled, synchronize] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_milvus: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + defaults: + run: + shell: bash + + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + # https://github.com/snok/install-poetry#running-on-windows + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: poetry install -E milvus --no-interaction + + - name: Run default basic pipeline + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: poetry run python ./cognee/tests/test_milvus.py + + - name: Clean up disk space + run: | + sudo rm -rf ~/.cache + sudo rm -rf /tmp/* + df -h diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index bfc0bbd1..d3774542 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import logging from typing import List, Optional @@ -6,7 +8,6 @@ from ..vector_db_interface import VectorDBInterface from ..models.ScoredResult import ScoredResult from ..embeddings.EmbeddingEngine import EmbeddingEngine -from pymilvus import MilvusClient logger = logging.getLogger("MilvusAdapter") @@ -31,8 +32,9 @@ def __init__(self, url: str, api_key: Optional[str], embedding_engine: Embedding self.embedding_engine = embedding_engine - def get_milvus_client(self) -> MilvusClient: - if self.api_key is not None: + def get_milvus_client(self) -> "MilvusClient": + from pymilvus import MilvusClient + if self.api_key: client = MilvusClient(uri=self.url, token=self.api_key) else: client = MilvusClient(uri=self.url) diff --git a/cognee/tests/test_milvus.py b/cognee/tests/test_milvus.py index d565f644..b32d3590 100644 --- a/cognee/tests/test_milvus.py +++ b/cognee/tests/test_milvus.py @@ -16,6 +16,14 @@ async def main(): pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_milvus")).resolve()) cognee.config.system_root_directory(cognee_directory_path) + cognee.config.set_vector_db_config( + { + "vector_db_url": os.path.join(cognee_directory_path, "databases/milvus.db"), + "vector_db_key": "", + "vector_db_provider": "milvus" + } + ) + await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) diff --git a/poetry.lock b/poetry.lock index 4b826264..3611dace 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2041,7 +2041,7 @@ typing-extensions = ">=4.7,<5" name = "grpcio" version = "1.67.1" description = "HTTP/2-based RPC framework" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, @@ -2751,8 +2751,6 @@ optional = false python-versions = "*" files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, - {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, - {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"}, ] [package.dependencies] @@ -3608,7 +3606,7 @@ files = [ name = "milvus-lite" version = "2.4.10" description = "A lightweight version of Milvus wrapped with Python." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "milvus_lite-2.4.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:fc4246d3ed7d1910847afce0c9ba18212e93a6e9b8406048436940578dfad5cb"}, @@ -4956,7 +4954,7 @@ files = [ name = "protobuf" version = "5.28.3" description = "" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24"}, @@ -5382,7 +5380,7 @@ extra = ["pygments (>=2.12)"] name = "pymilvus" version = "2.5.0" description = "Python Sdk for Milvus" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pymilvus-2.5.0-py3-none-any.whl", hash = "sha256:a0e8653d8fe78019abfda79b3404ef7423f312501e8cbd7dc728051ce8732652"}, @@ -7122,7 +7120,7 @@ files = [ name = "ujson" version = "5.10.0" description = "Ultra fast JSON encoder and decoder for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, @@ -7765,6 +7763,7 @@ groq = ["groq"] langchain = ["langchain_text_splitters", "langsmith"] langfuse = ["langfuse"] llama-index = ["llama-index-core"] +milvus = ["pymilvus"] neo4j = ["neo4j"] notebook = [] postgres = ["asyncpg", "pgvector", "psycopg2"] @@ -7775,4 +7774,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "6d578f99d990d462114faecd28a81aa50417bc541d64a67b53063f6c107eb3d3" +content-hash = "d6b10b74a910202f224ff34fa06ad3d2767796a6492a96724de0d608ac0356c5" From c301498da055ce3f93020c249756d78d0f1dd24b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Tue, 3 Dec 2024 11:58:34 +0100 Subject: [PATCH 4/4] fix: Fix batch search function Rewrite batch search to work as async gather Fix --- .../databases/vector/milvus/MilvusAdapter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index d3774542..84beb727 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -216,10 +216,15 @@ async def search( raise e async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False): - def query_search(query_vector): - return self.search(collection_name, query_vector=query_vector, limit=limit, with_vector=with_vectors) - - return [await query_search(query_vector) for query_vector in await self.embed_data(query_texts)] + query_vectors = await self.embed_data(query_texts) + + return await asyncio.gather( + *[self.search(collection_name=collection_name, + query_vector=query_vector, + limit=limit, + with_vector=with_vectors, + ) for query_vector in query_vectors] + ) async def delete_data_points(self, collection_name: str, data_point_ids: list[str]): from pymilvus import MilvusException