From 64f54a32c6aed01f0da4b616f46cf9967d20a9c3 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:18:27 +0200 Subject: [PATCH 01/17] Example task extraction --- cognee/api/v1/tasks/graph_ontology/graph_ontology.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 cognee/api/v1/tasks/graph_ontology/graph_ontology.py diff --git a/cognee/api/v1/tasks/graph_ontology/graph_ontology.py b/cognee/api/v1/tasks/graph_ontology/graph_ontology.py new file mode 100644 index 00000000..7a09b4f2 --- /dev/null +++ b/cognee/api/v1/tasks/graph_ontology/graph_ontology.py @@ -0,0 +1,11 @@ + + +from cognee.modules.pipelines.tasks.Task import Task + +from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology import establish_graph_topology +from cognee.shared.data_models import KnowledgeGraph + + + +async def ontology_task(): + return Task(establish_graph_topology, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }) \ No newline at end of file From 82ac9fc26a01acba224d65d59559522155061e9b Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:28:32 +0200 Subject: [PATCH 02/17] Fix to the task --- .../v1/tasks/graph_ontology/graph_ontology.py | 11 ---------- cognee/tasks/graph_ontology/graph_ontology.py | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 11 deletions(-) delete mode 100644 cognee/api/v1/tasks/graph_ontology/graph_ontology.py create mode 100644 cognee/tasks/graph_ontology/graph_ontology.py diff --git a/cognee/api/v1/tasks/graph_ontology/graph_ontology.py b/cognee/api/v1/tasks/graph_ontology/graph_ontology.py deleted file mode 100644 index 7a09b4f2..00000000 --- a/cognee/api/v1/tasks/graph_ontology/graph_ontology.py +++ /dev/null @@ -1,11 +0,0 @@ - - -from cognee.modules.pipelines.tasks.Task import Task - -from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology import establish_graph_topology -from cognee.shared.data_models import KnowledgeGraph - - - -async def ontology_task(): - return Task(establish_graph_topology, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }) \ No newline at end of file diff --git a/cognee/tasks/graph_ontology/graph_ontology.py b/cognee/tasks/graph_ontology/graph_ontology.py new file mode 100644 index 00000000..1e502cd6 --- /dev/null +++ b/cognee/tasks/graph_ontology/graph_ontology.py @@ -0,0 +1,22 @@ +from typing import Type +from pydantic import BaseModel + +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk +from cognee.shared.data_models import KnowledgeGraph +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph + + +async def establish_graph_topology(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): + if topology_model == KnowledgeGraph: + return data_chunks + + graph_engine = await get_graph_engine() + + await add_model_class_to_graph(topology_model, graph_engine) + + return data_chunks + + +def generate_node_id(node_id: str) -> str: + return node_id.upper().replace(" ", "_").replace("'", "") From 1087a7edda38257277c17e393f7747f8b3de9276 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 7 Aug 2024 07:31:31 +0200 Subject: [PATCH 03/17] Move tasks --- cognee/api/v1/cognify/cognify_v2.py | 25 +- .../classification/classify_text_chunks.py | 2 +- .../establish_graph_topology.py | 2 +- .../chunk_extract_summary.py | 38 +++ .../chunk_naive_llm_classifier.py | 152 ++++++++++++ .../chunk_remove_disconnected.py | 32 +++ .../chunk_to_graph_decomposition.py} | 2 +- .../chunk_to_vector_graphstore.py | 97 ++++++++ .../chunk_update_check/chunk_update_check.py | 26 +++ .../graph_decomposition_to_graph_nodes.py | 219 ++++++++++++++++++ .../source_documents_to_chunks.py | 45 ++++ 11 files changed, 629 insertions(+), 11 deletions(-) create mode 100644 cognee/tasks/chunk_extract_summary/chunk_extract_summary.py create mode 100644 cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py create mode 100644 cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py rename cognee/tasks/{graph_ontology/graph_ontology.py => chunk_to_graph_decomposition/chunk_to_graph_decomposition.py} (85%) create mode 100644 cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py create mode 100644 cognee/tasks/chunk_update_check/chunk_update_check.py create mode 100644 cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py create mode 100644 cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 9f2f9539..0c2dc4bb 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -27,6 +27,15 @@ from cognee.modules.users.permissions.methods import check_permissions_on_documents from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status +from cognee.tasks.chunk_extract_summary.chunk_extract_summary import chunk_extract_summary_task +from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task +from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task +from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task +from cognee.tasks.chunk_to_vector_graphstore.chunk_to_vector_graphstore import chunk_to_vector_graphstore_task +from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task +from cognee.tasks.graph_decomposition_to_graph_nodes.graph_decomposition_to_graph_nodes import \ + graph_decomposition_to_graph_nodes_task +from cognee.tasks.source_documents_to_chunks.source_documents_to_chunks import source_documents_to_chunks logger = logging.getLogger("cognify.v2") @@ -100,26 +109,26 @@ async def run_cognify_pipeline(dataset: Dataset): root_node_id = "ROOT" tasks = [ - Task(process_documents, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type - Task(establish_graph_topology, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data - Task(expand_knowledge_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes - Task(filter_affected_chunks, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks + Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type + Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data + Task(graph_decomposition_to_graph_nodes_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes + Task(chunk_update_check_task, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks Task( - save_data_chunks, + chunk_to_vector_graphstore_task, collection_name = "chunks", ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) run_tasks_parallel([ Task( - summarize_text_chunks, + chunk_extract_summary_task, summarization_model = cognee_config.summarization_model, collection_name = "chunk_summaries", ), # Summarize the document chunks Task( - classify_text_chunks, + chunk_naive_llm_classifier_task, classification_model = cognee_config.classification_model, ), ]), - Task(remove_obsolete_chunks), # Remove the obsolete document chunks. + Task(chunk_remove_disconnected_task), # Remove the obsolete document chunks. ] pipeline = run_tasks(tasks, documents) diff --git a/cognee/modules/classification/classify_text_chunks.py b/cognee/modules/classification/classify_text_chunks.py index 5546b41e..1e6be7e8 100644 --- a/cognee/modules/classification/classify_text_chunks.py +++ b/cognee/modules/classification/classify_text_chunks.py @@ -8,7 +8,7 @@ from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk from ..data.extraction.extract_categories import extract_categories -async def classify_text_chunks(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): +async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): if len(data_chunks) == 0: return data_chunks diff --git a/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py b/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py index ce061874..b2141a50 100644 --- a/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py +++ b/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py @@ -5,7 +5,7 @@ from ...processing.chunk_types.DocumentChunk import DocumentChunk from .add_model_class_to_graph import add_model_class_to_graph -async def establish_graph_topology(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): +async def chunk_to_graph_decomposition(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): if topology_model == KnowledgeGraph: return data_chunks diff --git a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py new file mode 100644 index 00000000..f0c74c19 --- /dev/null +++ b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py @@ -0,0 +1,38 @@ + +import asyncio +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.databases.vector import get_vector_engine, DataPoint +from cognee.modules.data.extraction.data_summary.models.TextSummary import TextSummary +from cognee.modules.data.extraction.extract_summary import extract_summary +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + + +async def chunk_extract_summary_task(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel], collection_name: str = "summaries"): + if len(data_chunks) == 0: + return data_chunks + + chunk_summaries = await asyncio.gather( + *[extract_summary(chunk.text, summarization_model) for chunk in data_chunks] + ) + + vector_engine = get_vector_engine() + + await vector_engine.create_collection(collection_name, payload_schema = TextSummary) + + await vector_engine.create_data_points( + collection_name, + [ + DataPoint[TextSummary]( + id = str(chunk.chunk_id), + payload = dict( + chunk_id = str(chunk.chunk_id), + document_id = str(chunk.document_id), + text = chunk_summaries[chunk_index].summary, + ), + embed_field = "text", + ) for (chunk_index, chunk) in enumerate(data_chunks) + ], + ) + + return data_chunks diff --git a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py new file mode 100644 index 00000000..4e75f87e --- /dev/null +++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py @@ -0,0 +1,152 @@ +import asyncio +from uuid import uuid5, NAMESPACE_OID +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.infrastructure.databases.vector import get_vector_engine, DataPoint +from cognee.modules.data.extraction.extract_categories import extract_categories +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + + +async def chunk_naive_llm_classifier_task(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): + if len(data_chunks) == 0: + return data_chunks + + chunk_classifications = await asyncio.gather( + *[extract_categories(chunk.text, classification_model) for chunk in data_chunks], + ) + + classification_data_points = [] + + for chunk_index, chunk in enumerate(data_chunks): + chunk_classification = chunk_classifications[chunk_index] + classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type)) + classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type)) + + for classification_subclass in chunk_classification.label.subclass: + classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value)) + + vector_engine = get_vector_engine() + + class Keyword(BaseModel): + uuid: str + text: str + chunk_id: str + document_id: str + + collection_name = "classification" + + if await vector_engine.has_collection(collection_name): + existing_data_points = await vector_engine.retrieve( + collection_name, + list(set(classification_data_points)), + ) if len(classification_data_points) > 0 else [] + + existing_points_map = {point.id: True for point in existing_data_points} + else: + existing_points_map = {} + await vector_engine.create_collection(collection_name, payload_schema=Keyword) + + data_points = [] + nodes = [] + edges = [] + + for (chunk_index, data_chunk) in enumerate(data_chunks): + chunk_classification = chunk_classifications[chunk_index] + classification_type_label = chunk_classification.label.type + classification_type_id = uuid5(NAMESPACE_OID, classification_type_label) + + if classification_type_id not in existing_points_map: + data_points.append( + DataPoint[Keyword]( + id=str(classification_type_id), + payload=Keyword.parse_obj({ + "uuid": str(classification_type_id), + "text": classification_type_label, + "chunk_id": str(data_chunk.chunk_id), + "document_id": str(data_chunk.document_id), + }), + embed_field="text", + ) + ) + + nodes.append(( + str(classification_type_id), + dict( + id=str(classification_type_id), + name=classification_type_label, + type=classification_type_label, + ) + )) + existing_points_map[classification_type_id] = True + + edges.append(( + str(data_chunk.chunk_id), + str(classification_type_id), + "is_media_type", + dict( + relationship_name="is_media_type", + source_node_id=str(data_chunk.chunk_id), + target_node_id=str(classification_type_id), + ), + )) + + for classification_subclass in chunk_classification.label.subclass: + classification_subtype_label = classification_subclass.value + classification_subtype_id = uuid5(NAMESPACE_OID, classification_subtype_label) + + if classification_subtype_id not in existing_points_map: + data_points.append( + DataPoint[Keyword]( + id=str(classification_subtype_id), + payload=Keyword.parse_obj({ + "uuid": str(classification_subtype_id), + "text": classification_subtype_label, + "chunk_id": str(data_chunk.chunk_id), + "document_id": str(data_chunk.document_id), + }), + embed_field="text", + ) + ) + + nodes.append(( + str(classification_subtype_id), + dict( + id=str(classification_subtype_id), + name=classification_subtype_label, + type=classification_subtype_label, + ) + )) + edges.append(( + str(classification_subtype_id), + str(classification_type_id), + "is_subtype_of", + dict( + relationship_name="contains", + source_node_id=str(classification_type_id), + target_node_id=str(classification_subtype_id), + ), + )) + + existing_points_map[classification_subtype_id] = True + + edges.append(( + str(data_chunk.chunk_id), + str(classification_subtype_id), + "is_classified_as", + dict( + relationship_name="is_classified_as", + source_node_id=str(data_chunk.chunk_id), + target_node_id=str(classification_subtype_id), + ), + )) + + if len(nodes) > 0 or len(edges) > 0: + await vector_engine.create_data_points(collection_name, data_points) + + graph_engine = await get_graph_engine() + + await graph_engine.add_nodes(nodes) + await graph_engine.add_edges(edges) + + return data_chunks diff --git a/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py b/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py new file mode 100644 index 00000000..0f046987 --- /dev/null +++ b/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py @@ -0,0 +1,32 @@ + +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + + +# from cognee.infrastructure.databases.vector import get_vector_engine + + +async def chunk_remove_disconnected_task(data_chunks: list[DocumentChunk]) -> list[DocumentChunk]: + graph_engine = await get_graph_engine() + + document_ids = set((data_chunk.document_id for data_chunk in data_chunks)) + + obsolete_chunk_ids = [] + + for document_id in document_ids: + chunk_ids = await graph_engine.get_successor_ids(document_id, edge_label = "has_chunk") + + for chunk_id in chunk_ids: + previous_chunks = await graph_engine.get_predecessor_ids(chunk_id, edge_label = "next_chunk") + + if len(previous_chunks) == 0: + obsolete_chunk_ids.append(chunk_id) + + if len(obsolete_chunk_ids) > 0: + await graph_engine.delete_nodes(obsolete_chunk_ids) + + disconnected_nodes = await graph_engine.get_disconnected_nodes() + if len(disconnected_nodes) > 0: + await graph_engine.delete_nodes(disconnected_nodes) + + return data_chunks diff --git a/cognee/tasks/graph_ontology/graph_ontology.py b/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py similarity index 85% rename from cognee/tasks/graph_ontology/graph_ontology.py rename to cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py index 1e502cd6..6f9c936d 100644 --- a/cognee/tasks/graph_ontology/graph_ontology.py +++ b/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py @@ -7,7 +7,7 @@ from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph -async def establish_graph_topology(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): +async def chunk_to_graph_decomposition_task(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): if topology_model == KnowledgeGraph: return data_chunks diff --git a/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py b/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py new file mode 100644 index 00000000..aceb879f --- /dev/null +++ b/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py @@ -0,0 +1,97 @@ +from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + +async def chunk_to_vector_graphstore_task(data_chunks: list[DocumentChunk], collection_name: str): + if len(data_chunks) == 0: + return data_chunks + + vector_engine = get_vector_engine() + graph_engine = await get_graph_engine() + + # Remove and unlink existing chunks + if await vector_engine.has_collection(collection_name): + existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve( + collection_name, + [str(chunk.chunk_id) for chunk in data_chunks], + ))] + + if len(existing_chunks) > 0: + await vector_engine.delete_data_points(collection_name, [str(chunk.chunk_id) for chunk in existing_chunks]) + + await graph_engine.remove_connection_to_successors_of([chunk.chunk_id for chunk in existing_chunks], "next_chunk") + await graph_engine.remove_connection_to_predecessors_of([chunk.chunk_id for chunk in existing_chunks], "has_chunk") + else: + await vector_engine.create_collection(collection_name, payload_schema = DocumentChunk) + + # Add to vector storage + await vector_engine.create_data_points( + collection_name, + [ + DataPoint[DocumentChunk]( + id = str(chunk.chunk_id), + payload = chunk, + embed_field = "text", + ) for chunk in data_chunks + ], + ) + + # Add to graph storage + chunk_nodes = [] + chunk_edges = [] + + for chunk in data_chunks: + chunk_nodes.append(( + str(chunk.chunk_id), + dict( + id = str(chunk.chunk_id), + chunk_id = str(chunk.chunk_id), + document_id = str(chunk.document_id), + word_count = chunk.word_count, + chunk_index = chunk.chunk_index, + cut_type = chunk.cut_type, + pages = chunk.pages, + ) + )) + + chunk_edges.append(( + str(chunk.document_id), + str(chunk.chunk_id), + "has_chunk", + dict( + relationship_name = "has_chunk", + source_node_id = str(chunk.document_id), + target_node_id = str(chunk.chunk_id), + ), + )) + + previous_chunk_id = get_previous_chunk_id(data_chunks, chunk) + + if previous_chunk_id is not None: + chunk_edges.append(( + str(previous_chunk_id), + str(chunk.chunk_id), + "next_chunk", + dict( + relationship_name = "next_chunk", + source_node_id = str(previous_chunk_id), + target_node_id = str(chunk.chunk_id), + ), + )) + + await graph_engine.add_nodes(chunk_nodes) + await graph_engine.add_edges(chunk_edges) + + return data_chunks + + +def get_previous_chunk_id(document_chunks: list[DocumentChunk], current_chunk: DocumentChunk) -> DocumentChunk: + if current_chunk.chunk_index == 0: + return current_chunk.document_id + + for chunk in document_chunks: + if str(chunk.document_id) == str(current_chunk.document_id) \ + and chunk.chunk_index == current_chunk.chunk_index - 1: + return chunk.chunk_id + + return None diff --git a/cognee/tasks/chunk_update_check/chunk_update_check.py b/cognee/tasks/chunk_update_check/chunk_update_check.py new file mode 100644 index 00000000..cd532b68 --- /dev/null +++ b/cognee/tasks/chunk_update_check/chunk_update_check.py @@ -0,0 +1,26 @@ +from cognee.infrastructure.databases.vector import get_vector_engine +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + + +async def chunk_update_check_task(data_chunks: list[DocumentChunk], collection_name: str) -> list[DocumentChunk]: + vector_engine = get_vector_engine() + + if not await vector_engine.has_collection(collection_name): + # If collection doesn't exist, all data_chunks are new + return data_chunks + + existing_chunks = await vector_engine.retrieve( + collection_name, + [str(chunk.chunk_id) for chunk in data_chunks], + ) + + existing_chunks_map = {chunk.id: chunk.payload for chunk in existing_chunks} + + affected_data_chunks = [] + + for chunk in data_chunks: + if chunk.chunk_id not in existing_chunks_map or \ + chunk.text != existing_chunks_map[chunk.chunk_id]["text"]: + affected_data_chunks.append(chunk) + + return affected_data_chunks diff --git a/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py b/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py new file mode 100644 index 00000000..2c7539e3 --- /dev/null +++ b/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py @@ -0,0 +1,219 @@ +import json +import asyncio +from uuid import uuid5, NAMESPACE_OID +from datetime import datetime, timezone +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine +from cognee.modules.data.extraction.knowledge_graph.extract_content_graph import extract_content_graph +from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk + + +class EntityNode(BaseModel): + uuid: str + name: str + type: str + description: str + created_at: datetime + updated_at: datetime + +async def graph_decomposition_to_graph_nodes_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): + chunk_graphs = await asyncio.gather( + *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] + ) + + vector_engine = get_vector_engine() + graph_engine = await get_graph_engine() + + has_collection = await vector_engine.has_collection(collection_name) + + if not has_collection: + await vector_engine.create_collection(collection_name, payload_schema = EntityNode) + + processed_nodes = {} + type_node_edges = [] + entity_node_edges = [] + type_entity_edges = [] + + for (chunk_index, chunk) in enumerate(data_chunks): + chunk_graph = chunk_graphs[chunk_index] + for node in chunk_graph.nodes: + type_node_id = generate_node_id(node.type) + entity_node_id = generate_node_id(node.id) + + if type_node_id not in processed_nodes: + type_node_edges.append((str(chunk.chunk_id), type_node_id, "contains_entity_type")) + processed_nodes[type_node_id] = True + + if entity_node_id not in processed_nodes: + entity_node_edges.append((str(chunk.chunk_id), entity_node_id, "contains_entity")) + type_entity_edges.append((entity_node_id, type_node_id, "is_entity_type")) + processed_nodes[entity_node_id] = True + + graph_node_edges = [ + (edge.source_node_id, edge.target_node_id, edge.relationship_name) \ + for edge in chunk_graph.edges + ] + + existing_edges = await graph_engine.has_edges([ + *type_node_edges, + *entity_node_edges, + *type_entity_edges, + *graph_node_edges, + ]) + + existing_edges_map = {} + existing_nodes_map = {} + + for edge in existing_edges: + existing_edges_map[edge[0] + edge[1] + edge[2]] = True + existing_nodes_map[edge[0]] = True + + graph_nodes = [] + graph_edges = [] + data_points = [] + + for (chunk_index, chunk) in enumerate(data_chunks): + graph = chunk_graphs[chunk_index] + if graph is None: + continue + + for node in graph.nodes: + node_id = generate_node_id(node.id) + node_name = generate_name(node.name) + + type_node_id = generate_node_id(node.type) + type_node_name = generate_name(node.type) + + if node_id not in existing_nodes_map: + node_data = dict( + uuid = node_id, + name = node_name, + type = node_name, + description = node.description, + created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + ) + + graph_nodes.append(( + node_id, + dict( + **node_data, + properties = json.dumps(node.properties), + ) + )) + + data_points.append(DataPoint[EntityNode]( + id = str(uuid5(NAMESPACE_OID, node_id)), + payload = node_data, + embed_field = "name", + )) + + existing_nodes_map[node_id] = True + + edge_key = str(chunk.chunk_id) + node_id + "contains_entity" + + if edge_key not in existing_edges_map: + graph_edges.append(( + str(chunk.chunk_id), + node_id, + "contains_entity", + dict( + relationship_name = "contains_entity", + source_node_id = str(chunk.chunk_id), + target_node_id = node_id, + ), + )) + + # Add relationship between entity type and entity itself: "Jake is Person" + graph_edges.append(( + node_id, + type_node_id, + "is_entity_type", + dict( + relationship_name = "is_entity_type", + source_node_id = type_node_id, + target_node_id = node_id, + ), + )) + + existing_edges_map[edge_key] = True + + if type_node_id not in existing_nodes_map: + type_node_data = dict( + uuid = type_node_id, + name = type_node_name, + type = type_node_id, + description = type_node_name, + created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), + ) + + graph_nodes.append((type_node_id, dict( + **type_node_data, + properties = json.dumps(node.properties) + ))) + + data_points.append(DataPoint[EntityNode]( + id = str(uuid5(NAMESPACE_OID, type_node_id)), + payload = type_node_data, + embed_field = "name", + )) + + existing_nodes_map[type_node_id] = True + + edge_key = str(chunk.chunk_id) + type_node_id + "contains_entity_type" + + if edge_key not in existing_edges_map: + graph_edges.append(( + str(chunk.chunk_id), + type_node_id, + "contains_entity_type", + dict( + relationship_name = "contains_entity_type", + source_node_id = str(chunk.chunk_id), + target_node_id = type_node_id, + ), + )) + + existing_edges_map[edge_key] = True + + # Add relationship that came from graphs. + for edge in graph.edges: + source_node_id = generate_node_id(edge.source_node_id) + target_node_id = generate_node_id(edge.target_node_id) + relationship_name = generate_name(edge.relationship_name) + edge_key = source_node_id + target_node_id + relationship_name + + if edge_key not in existing_edges_map: + graph_edges.append(( + generate_node_id(edge.source_node_id), + generate_node_id(edge.target_node_id), + edge.relationship_name, + dict( + relationship_name = generate_name(edge.relationship_name), + source_node_id = generate_node_id(edge.source_node_id), + target_node_id = generate_node_id(edge.target_node_id), + properties = json.dumps(edge.properties), + ), + )) + existing_edges_map[edge_key] = True + + if len(data_points) > 0: + await vector_engine.create_data_points(collection_name, data_points) + + if len(graph_nodes) > 0: + await graph_engine.add_nodes(graph_nodes) + + if len(graph_edges) > 0: + await graph_engine.add_edges(graph_edges) + + return data_chunks + + +def generate_name(name: str) -> str: + return name.lower().replace(" ", "_").replace("'", "") + +def generate_node_id(node_id: str) -> str: + return node_id.lower().replace(" ", "_").replace("'", "") diff --git a/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py b/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py new file mode 100644 index 00000000..116f020a --- /dev/null +++ b/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py @@ -0,0 +1,45 @@ +from cognee.infrastructure.databases.graph import get_graph_engine +from cognee.modules.data.processing.document_types.Document import Document + + +async def source_documents_to_chunks(documents: list[Document], parent_node_id: str = None, user:str=None, user_permissions:str=None): + graph_engine = await get_graph_engine() + + nodes = [] + edges = [] + + if parent_node_id and await graph_engine.extract_node(parent_node_id) is None: + nodes.append((parent_node_id, {})) + + document_nodes = await graph_engine.extract_nodes([str(document.id) for document in documents]) + + for (document_index, document) in enumerate(documents): + document_node = document_nodes[document_index] if document_index in document_nodes else None + + if document_node is None: + document_dict = document.to_dict() + document_dict["user"] = user + document_dict["user_permissions"] = user_permissions + nodes.append((str(document.id), document.to_dict())) + + if parent_node_id: + edges.append(( + parent_node_id, + str(document.id), + "has_document", + dict( + relationship_name = "has_document", + source_node_id = parent_node_id, + target_node_id = str(document.id), + ), + )) + + if len(nodes) > 0: + await graph_engine.add_nodes(nodes) + await graph_engine.add_edges(edges) + + for document in documents: + document_reader = document.get_reader() + + for document_chunk in document_reader.read(max_chunk_size = 1024): + yield document_chunk From c4c5bf91281d73fc467101f75d5c33b7e5071ec6 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 7 Aug 2024 10:50:53 +0200 Subject: [PATCH 04/17] Move tasks --- cognee/tasks/chunk_extract_summary/chunk_extract_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py index f0c74c19..ffb16f6d 100644 --- a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py +++ b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py @@ -18,7 +18,7 @@ async def chunk_extract_summary_task(data_chunks: list[DocumentChunk], summariza vector_engine = get_vector_engine() - await vector_engine.create_collection(collection_name, payload_schema = TextSummary) + await vector_engine.create_collection(collection_name, payload_schema=TextSummary) await vector_engine.create_data_points( collection_name, From a782286b932338ec3f2bb4036c9cc0e4f4dd12e4 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:36:02 +0200 Subject: [PATCH 05/17] Task updates and updates to SQLAlchemy Adapter --- cognee/api/v1/cognify/cognify_v2.py | 16 +- .../sqlalchemy/SqlAlchemyAdapter.py | 11 +- poetry.lock | 535 +++++++++--------- 3 files changed, 286 insertions(+), 276 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 0c2dc4bb..95e56108 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -9,14 +9,14 @@ from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument from cognee.shared.data_models import KnowledgeGraph from cognee.modules.data.processing.document_types import PdfDocument, TextDocument -from cognee.modules.cognify.vector import save_data_chunks -from cognee.modules.data.processing.process_documents import process_documents -from cognee.modules.classification.classify_text_chunks import classify_text_chunks -from cognee.modules.data.extraction.data_summary.summarize_text_chunks import summarize_text_chunks -from cognee.modules.data.processing.filter_affected_chunks import filter_affected_chunks -from cognee.modules.data.processing.remove_obsolete_chunks import remove_obsolete_chunks -from cognee.modules.data.extraction.knowledge_graph.expand_knowledge_graph import expand_knowledge_graph -from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology import establish_graph_topology +# from cognee.modules.cognify.vector import save_data_chunks +# from cognee.modules.data.processing.process_documents import process_documents +# from cognee.modules.classification.classify_text_chunks import classify_text_chunks +# from cognee.modules.data.extraction.data_summary.summarize_text_chunks import summarize_text_chunks +# from cognee.modules.data.processing.filter_affected_chunks import filter_affected_chunks +# from cognee.modules.data.processing.remove_obsolete_chunks import remove_obsolete_chunks +# from cognee.modules.data.extraction.knowledge_graph.expand_knowledge_graph import expand_knowledge_graph +# from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology import establish_graph_topology from cognee.modules.data.models import Dataset, Data from cognee.modules.data.operations.get_dataset_data import get_dataset_data from cognee.modules.data.operations.retrieve_datasets import retrieve_datasets diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index c4b0a1e0..799c79d8 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -58,7 +58,7 @@ async def create_table(self, schema_name: str, table_name: str, table_config: li async def delete_table(self, table_name: str): async with self.engine.connect() as connection: - await connection.execute(text(f"DROP TABLE IF EXISTS {table_name};")) + await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;")) async def insert_data(self, schema_name: str, table_name: str, data: list[dict]): columns = ", ".join(data[0].keys()) @@ -101,9 +101,10 @@ async def drop_tables(self, connection): async def delete_database(self): async with self.engine.connect() as connection: try: - async with self.engine.begin() as connection: - await connection.run_sync(Base.metadata.drop_all) - + async with connection.begin() as trans: + for table in Base.metadata.sorted_tables: + drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE') + await connection.execute(drop_table_query) print("Database deleted successfully.") except Exception as e: - print(f"Error deleting database: {e}") + print(f"Error deleting database: {e}") \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index c5660ca5..011b0db8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiofiles" @@ -427,13 +427,13 @@ test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"] [[package]] name = "attrs" -version = "24.1.0" +version = "24.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" files = [ - {file = "attrs-24.1.0-py3-none-any.whl", hash = "sha256:377b47448cb61fea38533f671fba0d0f8a96fd58facd4dc518e3dac9dbea0905"}, - {file = "attrs-24.1.0.tar.gz", hash = "sha256:adbdec84af72d38be7628e353a09b6a6790d15cd71819f6e9d7b0faa8a125745"}, + {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, + {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, ] [package.extras] @@ -621,17 +621,17 @@ css = ["tinycss2 (>=1.1.0,<1.3)"] [[package]] name = "boto3" -version = "1.34.154" +version = "1.34.155" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.154-py3-none-any.whl", hash = "sha256:7ca22adef4c77ee128e1e1dc7d48bc9512a87cc6fe3d771b3f913d5ecd41c057"}, - {file = "boto3-1.34.154.tar.gz", hash = "sha256:864f06528c583dc7b02adf12db395ecfadbf9cb0da90e907e848ffb27128ce19"}, + {file = "boto3-1.34.155-py3-none-any.whl", hash = "sha256:445239ea2ba7f4084ddbd71f721c14d0a6d08e06f6ba51b5403a16b6544b3f1e"}, + {file = "boto3-1.34.155.tar.gz", hash = "sha256:e8d2e128c74e84199edccdc3a6b4b1c6fb36d6fdb5688eb92931827f02c6fa5b"}, ] [package.dependencies] -botocore = ">=1.34.154,<1.35.0" +botocore = ">=1.34.155,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -640,13 +640,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.154" +version = "1.34.155" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.154-py3-none-any.whl", hash = "sha256:4eef4b1bb809b382ba9dc9c88f5fcc4a133f221a1acb693ee6bee4de9f325979"}, - {file = "botocore-1.34.154.tar.gz", hash = "sha256:64d9b4c85a504d77cb56dabb2ad717cd8e1717424a88edb458b01d1e5797262a"}, + {file = "botocore-1.34.155-py3-none-any.whl", hash = "sha256:f2696c11bb0cad627d42512937befd2e3f966aedd15de00d90ee13cf7a16b328"}, + {file = "botocore-1.34.155.tar.gz", hash = "sha256:3aa88abfef23909f68d3e6679a3d4b4bb3c6288a6cfbf9e253aa68dac8edad64"}, ] [package.dependencies] @@ -658,7 +658,7 @@ urllib3 = [ ] [package.extras] -crt = ["awscrt (==0.20.11)"] +crt = ["awscrt (==0.21.2)"] [[package]] name = "cachetools" @@ -725,63 +725,78 @@ files = [ [[package]] name = "cffi" -version = "1.16.0" +version = "1.17.0" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" files = [ - {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, - {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, - {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, - {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, - {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, - {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, - {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, - {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, - {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, - {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, - {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, - {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, - {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, - {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, - {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, + {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"}, + {file = "cffi-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0ce71725cacc9ebf839630772b07eeec220cbb5f03be1399e0457a1464f8e1a"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c815270206f983309915a6844fe994b2fa47e5d05c4c4cef267c3b30e34dbe42"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6bdcd415ba87846fd317bee0774e412e8792832e7805938987e4ede1d13046d"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a98748ed1a1df4ee1d6f927e151ed6c1a09d5ec21684de879c7ea6aa96f58f2"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a048d4f6630113e54bb4b77e315e1ba32a5a31512c31a273807d0027a7e69ab"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24aa705a5f5bd3a8bcfa4d123f03413de5d86e497435693b638cbffb7d5d8a1b"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:856bf0924d24e7f93b8aee12a3a1095c34085600aa805693fb7f5d1962393206"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4304d4416ff032ed50ad6bb87416d802e67139e31c0bde4628f36a47a3164bfa"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:331ad15c39c9fe9186ceaf87203a9ecf5ae0ba2538c9e898e3a6967e8ad3db6f"}, + {file = "cffi-1.17.0-cp310-cp310-win32.whl", hash = "sha256:669b29a9eca6146465cc574659058ed949748f0809a2582d1f1a324eb91054dc"}, + {file = "cffi-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:48b389b1fd5144603d61d752afd7167dfd205973a43151ae5045b35793232aa2"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5d97162c196ce54af6700949ddf9409e9833ef1003b4741c2b39ef46f1d9720"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ba5c243f4004c750836f81606a9fcb7841f8874ad8f3bf204ff5e56332b72b9"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb9333f58fc3a2296fb1d54576138d4cf5d496a2cc118422bd77835e6ae0b9cb"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:435a22d00ec7d7ea533db494da8581b05977f9c37338c80bc86314bec2619424"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1df34588123fcc88c872f5acb6f74ae59e9d182a2707097f9e28275ec26a12d"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df8bb0010fdd0a743b7542589223a2816bdde4d94bb5ad67884348fa2c1c67e8"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b5b9712783415695663bd463990e2f00c6750562e6ad1d28e072a611c5f2a6"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ffef8fd58a36fb5f1196919638f73dd3ae0db1a878982b27a9a5a176ede4ba91"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e67d26532bfd8b7f7c05d5a766d6f437b362c1bf203a3a5ce3593a645e870b8"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45f7cd36186db767d803b1473b3c659d57a23b5fa491ad83c6d40f2af58e4dbb"}, + {file = "cffi-1.17.0-cp311-cp311-win32.whl", hash = "sha256:a9015f5b8af1bb6837a3fcb0cdf3b874fe3385ff6274e8b7925d81ccaec3c5c9"}, + {file = "cffi-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:b50aaac7d05c2c26dfd50c3321199f019ba76bb650e346a6ef3616306eed67b0"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aec510255ce690d240f7cb23d7114f6b351c733a74c279a84def763660a2c3bc"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2770bb0d5e3cc0e31e7318db06efcbcdb7b31bcb1a70086d3177692a02256f59"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db9a30ec064129d605d0f1aedc93e00894b9334ec74ba9c6bdd08147434b33eb"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a47eef975d2b8b721775a0fa286f50eab535b9d56c70a6e62842134cf7841195"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3e0992f23bbb0be00a921eae5363329253c3b86287db27092461c887b791e5e"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6107e445faf057c118d5050560695e46d272e5301feffda3c41849641222a828"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb862356ee9391dc5a0b3cbc00f416b48c1b9a52d252d898e5b7696a5f9fe150"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c1c13185b90bbd3f8b5963cd8ce7ad4ff441924c31e23c975cb150e27c2bf67a"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17c6d6d3260c7f2d94f657e6872591fe8733872a86ed1345bda872cfc8c74885"}, + {file = "cffi-1.17.0-cp312-cp312-win32.whl", hash = "sha256:c3b8bd3133cd50f6b637bb4322822c94c5ce4bf0d724ed5ae70afce62187c492"}, + {file = "cffi-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:dca802c8db0720ce1c49cce1149ff7b06e91ba15fa84b1d59144fef1a1bc7ac2"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ce01337d23884b21c03869d2f68c5523d43174d4fc405490eb0091057943118"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cab2eba3830bf4f6d91e2d6718e0e1c14a2f5ad1af68a89d24ace0c6b17cced7"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b9cbc8f7ac98a739558eb86fabc283d4d564dafed50216e7f7ee62d0d25377"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b00e7bcd71caa0282cbe3c90966f738e2db91e64092a877c3ff7f19a1628fdcb"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41f4915e09218744d8bae14759f983e466ab69b178de38066f7579892ff2a555"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4760a68cab57bfaa628938e9c2971137e05ce48e762a9cb53b76c9b569f1204"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:011aff3524d578a9412c8b3cfaa50f2c0bd78e03eb7af7aa5e0df59b158efb2f"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a003ac9edc22d99ae1286b0875c460351f4e101f8c9d9d2576e78d7e048f64e0"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ef9528915df81b8f4c7612b19b8628214c65c9b7f74db2e34a646a0a2a0da2d4"}, + {file = "cffi-1.17.0-cp313-cp313-win32.whl", hash = "sha256:70d2aa9fb00cf52034feac4b913181a6e10356019b18ef89bc7c12a283bf5f5a"}, + {file = "cffi-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:b7b6ea9e36d32582cda3465f54c4b454f62f23cb083ebc7a94e2ca6ef011c3a7"}, + {file = "cffi-1.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:964823b2fc77b55355999ade496c54dde161c621cb1f6eac61dc30ed1b63cd4c"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:516a405f174fd3b88829eabfe4bb296ac602d6a0f68e0d64d5ac9456194a5b7e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dec6b307ce928e8e112a6bb9921a1cb00a0e14979bf28b98e084a4b8a742bd9b"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4094c7b464cf0a858e75cd14b03509e84789abf7b79f8537e6a72152109c76e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2404f3de742f47cb62d023f0ba7c5a916c9c653d5b368cc966382ae4e57da401"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa9d43b02a0c681f0bfbc12d476d47b2b2b6a3f9287f11ee42989a268a1833c"}, + {file = "cffi-1.17.0-cp38-cp38-win32.whl", hash = "sha256:0bb15e7acf8ab35ca8b24b90af52c8b391690ef5c4aec3d31f38f0d37d2cc499"}, + {file = "cffi-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a7350f6706b31f457c1457d3a3259ff9071a66f312ae64dc024f049055f72c"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a2ddbac59dc3716bc79f27906c010406155031a1c801410f1bafff17ea304d2"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6327b572f5770293fc062a7ec04160e89741e8552bf1c358d1a23eba68166759"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbc183e7bef690c9abe5ea67b7b60fdbca81aa8da43468287dae7b5c046107d4"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bdc0f1f610d067c70aa3737ed06e2726fd9d6f7bfee4a351f4c40b6831f4e82"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d872186c1617d143969defeadac5a904e6e374183e07977eedef9c07c8953bf"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d46ee4764b88b91f16661a8befc6bfb24806d885e27436fdc292ed7e6f6d058"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e60821d312f99d3e1569202518dddf10ae547e799d75aef3bca3a2d9e8ee693"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:eb09b82377233b902d4c3fbeeb7ad731cdab579c6c6fda1f763cd779139e47c3"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24658baf6224d8f280e827f0a50c46ad819ec8ba380a42448e24459daf809cf4"}, + {file = "cffi-1.17.0-cp39-cp39-win32.whl", hash = "sha256:0fdacad9e0d9fc23e519efd5ea24a70348305e8d7d85ecbb1a5fa66dc834e7fb"}, + {file = "cffi-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cbc78dc018596315d4e7841c8c3a7ae31cc4d638c9b627f87d52e8abaaf2d29"}, + {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"}, ] [package.dependencies] @@ -3168,13 +3183,13 @@ tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19)", "pyt [[package]] name = "langfuse" -version = "2.41.0" +version = "2.42.1" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langfuse-2.41.0-py3-none-any.whl", hash = "sha256:f7db4078ff629c9c067970dac35419be199d46aeff272d903a74c5ec29e5969b"}, - {file = "langfuse-2.41.0.tar.gz", hash = "sha256:d529408079af54ddf84ac72290a756e6721357b09236cbeae728eb04c7371c6c"}, + {file = "langfuse-2.42.1-py3-none-any.whl", hash = "sha256:8895d9645aea91815db51565f90e110a76d5e157a7b12eaf1cd6959e7aaa2263"}, + {file = "langfuse-2.42.1.tar.gz", hash = "sha256:f89faf1c14308d488c90f8b7d0368fff3d259f80ffe34d169b9cfc3f0dbfab82"}, ] [package.dependencies] @@ -3359,40 +3374,40 @@ files = [ [[package]] name = "matplotlib" -version = "3.9.0" +version = "3.9.1.post1" description = "Python plotting package" optional = false python-versions = ">=3.9" files = [ - {file = "matplotlib-3.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2bcee1dffaf60fe7656183ac2190bd630842ff87b3153afb3e384d966b57fe56"}, - {file = "matplotlib-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3f988bafb0fa39d1074ddd5bacd958c853e11def40800c5824556eb630f94d3b"}, - {file = "matplotlib-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe428e191ea016bb278758c8ee82a8129c51d81d8c4bc0846c09e7e8e9057241"}, - {file = "matplotlib-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf3978060a106fab40c328778b148f590e27f6fa3cd15a19d6892575bce387d"}, - {file = "matplotlib-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e7f03e5cbbfacdd48c8ea394d365d91ee8f3cae7e6ec611409927b5ed997ee4"}, - {file = "matplotlib-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:13beb4840317d45ffd4183a778685e215939be7b08616f431c7795276e067463"}, - {file = "matplotlib-3.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:063af8587fceeac13b0936c42a2b6c732c2ab1c98d38abc3337e430e1ff75e38"}, - {file = "matplotlib-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9a2fa6d899e17ddca6d6526cf6e7ba677738bf2a6a9590d702c277204a7c6152"}, - {file = "matplotlib-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:550cdda3adbd596078cca7d13ed50b77879104e2e46392dcd7c75259d8f00e85"}, - {file = "matplotlib-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cce0f31b351e3551d1f3779420cf8f6ec0d4a8cf9c0237a3b549fd28eb4abb"}, - {file = "matplotlib-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c53aeb514ccbbcbab55a27f912d79ea30ab21ee0531ee2c09f13800efb272674"}, - {file = "matplotlib-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5be985db2596d761cdf0c2eaf52396f26e6a64ab46bd8cd810c48972349d1be"}, - {file = "matplotlib-3.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:c79f3a585f1368da6049318bdf1f85568d8d04b2e89fc24b7e02cc9b62017382"}, - {file = "matplotlib-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bdd1ecbe268eb3e7653e04f451635f0fb0f77f07fd070242b44c076c9106da84"}, - {file = "matplotlib-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d38e85a1a6d732f645f1403ce5e6727fd9418cd4574521d5803d3d94911038e5"}, - {file = "matplotlib-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a490715b3b9984fa609116481b22178348c1a220a4499cda79132000a79b4db"}, - {file = "matplotlib-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8146ce83cbc5dc71c223a74a1996d446cd35cfb6a04b683e1446b7e6c73603b7"}, - {file = "matplotlib-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:d91a4ffc587bacf5c4ce4ecfe4bcd23a4b675e76315f2866e588686cc97fccdf"}, - {file = "matplotlib-3.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:616fabf4981a3b3c5a15cd95eba359c8489c4e20e03717aea42866d8d0465956"}, - {file = "matplotlib-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd53c79fd02f1c1808d2cfc87dd3cf4dbc63c5244a58ee7944497107469c8d8a"}, - {file = "matplotlib-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06a478f0d67636554fa78558cfbcd7b9dba85b51f5c3b5a0c9be49010cf5f321"}, - {file = "matplotlib-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c40af649d19c85f8073e25e5806926986806fa6d54be506fbf02aef47d5a89"}, - {file = "matplotlib-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52146fc3bd7813cc784562cb93a15788be0b2875c4655e2cc6ea646bfa30344b"}, - {file = "matplotlib-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:0fc51eaa5262553868461c083d9adadb11a6017315f3a757fc45ec6ec5f02888"}, - {file = "matplotlib-3.9.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bd4f2831168afac55b881db82a7730992aa41c4f007f1913465fb182d6fb20c0"}, - {file = "matplotlib-3.9.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:290d304e59be2b33ef5c2d768d0237f5bd132986bdcc66f80bc9bcc300066a03"}, - {file = "matplotlib-3.9.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff2e239c26be4f24bfa45860c20ffccd118d270c5b5d081fa4ea409b5469fcd"}, - {file = "matplotlib-3.9.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:af4001b7cae70f7eaacfb063db605280058246de590fa7874f00f62259f2df7e"}, - {file = "matplotlib-3.9.0.tar.gz", hash = "sha256:e6d29ea6c19e34b30fb7d88b7081f869a03014f66fe06d62cc77d5a6ea88ed7a"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3779ad3e8b72df22b8a622c5796bbcfabfa0069b835412e3c1dec8ee3de92d0c"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec400340f8628e8e2260d679078d4e9b478699f386e5cc8094e80a1cb0039c7c"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82c18791b8862ea095081f745b81f896b011c5a5091678fb33204fef641476af"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:621a628389c09a6b9f609a238af8e66acecece1cfa12febc5fe4195114ba7446"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9a54734ca761ebb27cd4f0b6c2ede696ab6861052d7d7e7b8f7a6782665115f5"}, + {file = "matplotlib-3.9.1.post1-cp310-cp310-win_amd64.whl", hash = "sha256:0721f93db92311bb514e446842e2b21c004541dcca0281afa495053e017c5458"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b08b46058fe2a31ecb81ef6aa3611f41d871f6a8280e9057cb4016cb3d8e894a"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22b344e84fcc574f561b5731f89a7625db8ef80cdbb0026a8ea855a33e3429d1"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b49fee26d64aefa9f061b575f0f7b5fc4663e51f87375c7239efa3d30d908fa"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89eb7e89e2b57856533c5c98f018aa3254fa3789fcd86d5f80077b9034a54c9a"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c06e742bade41fda6176d4c9c78c9ea016e176cd338e62a1686384cb1eb8de41"}, + {file = "matplotlib-3.9.1.post1-cp311-cp311-win_amd64.whl", hash = "sha256:c44edab5b849e0fc1f1c9d6e13eaa35ef65925f7be45be891d9784709ad95561"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bf28b09986aee06393e808e661c3466be9c21eff443c9bc881bce04bfbb0c500"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:92aeb8c439d4831510d8b9d5e39f31c16c7f37873879767c26b147cef61e54cd"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f15798b0691b45c80d3320358a88ce5a9d6f518b28575b3ea3ed31b4bd95d009"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d59fc6096da7b9c1df275f9afc3fef5cbf634c21df9e5f844cba3dd8deb1847d"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ab986817a32a70ce22302438691e7df4c6ee4a844d47289db9d583d873491e0b"}, + {file = "matplotlib-3.9.1.post1-cp312-cp312-win_amd64.whl", hash = "sha256:0d78e7d2d86c4472da105d39aba9b754ed3dfeaeaa4ac7206b82706e0a5362fa"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bd07eba6431b4dc9253cce6374a28c415e1d3a7dc9f8aba028ea7592f06fe172"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ca230cc4482010d646827bd2c6d140c98c361e769ae7d954ebf6fff2a226f5b1"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ace27c0fdeded399cbc43f22ffa76e0f0752358f5b33106ec7197534df08725a"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a4f3aeb7ba14c497dc6f021a076c48c2e5fbdf3da1e7264a5d649683e284a2f"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:23f96fbd4ff4cfa9b8a6b685a65e7eb3c2ced724a8d965995ec5c9c2b1f7daf5"}, + {file = "matplotlib-3.9.1.post1-cp39-cp39-win_amd64.whl", hash = "sha256:2808b95452b4ffa14bfb7c7edffc5350743c31bda495f0d63d10fdd9bc69e895"}, + {file = "matplotlib-3.9.1.post1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ffc91239f73b4179dec256b01299d46d0ffa9d27d98494bc1476a651b7821cbe"}, + {file = "matplotlib-3.9.1.post1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f965ebca9fd4feaaca45937c4849d92b70653057497181100fcd1e18161e5f29"}, + {file = "matplotlib-3.9.1.post1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801ee9323fd7b2da0d405aebbf98d1da77ea430bbbbbec6834c0b3af15e5db44"}, + {file = "matplotlib-3.9.1.post1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:50113e9b43ceb285739f35d43db36aa752fb8154325b35d134ff6e177452f9ec"}, + {file = "matplotlib-3.9.1.post1.tar.gz", hash = "sha256:c91e585c65092c975a44dc9d4239ba8c594ba3c193d7c478b6d178c4ef61f406"}, ] [package.dependencies] @@ -5532,62 +5547,64 @@ files = [ [[package]] name = "pyyaml" -version = "6.0.1" +version = "6.0.2" description = "YAML parser and emitter for Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] [[package]] @@ -5961,114 +5978,114 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rpds-py" -version = "0.19.1" +version = "0.20.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.8" files = [ - {file = "rpds_py-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aaf71f95b21f9dc708123335df22e5a2fef6307e3e6f9ed773b2e0938cc4d491"}, - {file = "rpds_py-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca0dda0c5715efe2ab35bb83f813f681ebcd2840d8b1b92bfc6fe3ab382fae4a"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81db2e7282cc0487f500d4db203edc57da81acde9e35f061d69ed983228ffe3b"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1a8dfa125b60ec00c7c9baef945bb04abf8ac772d8ebefd79dae2a5f316d7850"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:271accf41b02687cef26367c775ab220372ee0f4925591c6796e7c148c50cab5"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9bc4161bd3b970cd6a6fcda70583ad4afd10f2750609fb1f3ca9505050d4ef3"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0cf2a0dbb5987da4bd92a7ca727eadb225581dd9681365beba9accbe5308f7d"}, - {file = "rpds_py-0.19.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b5e28e56143750808c1c79c70a16519e9bc0a68b623197b96292b21b62d6055c"}, - {file = "rpds_py-0.19.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c7af6f7b80f687b33a4cdb0a785a5d4de1fb027a44c9a049d8eb67d5bfe8a687"}, - {file = "rpds_py-0.19.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e429fc517a1c5e2a70d576077231538a98d59a45dfc552d1ac45a132844e6dfb"}, - {file = "rpds_py-0.19.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2dbd8f4990d4788cb122f63bf000357533f34860d269c1a8e90ae362090ff3a"}, - {file = "rpds_py-0.19.1-cp310-none-win32.whl", hash = "sha256:e0f9d268b19e8f61bf42a1da48276bcd05f7ab5560311f541d22557f8227b866"}, - {file = "rpds_py-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:df7c841813f6265e636fe548a49664c77af31ddfa0085515326342a751a6ba51"}, - {file = "rpds_py-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:902cf4739458852fe917104365ec0efbea7d29a15e4276c96a8d33e6ed8ec137"}, - {file = "rpds_py-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f3d73022990ab0c8b172cce57c69fd9a89c24fd473a5e79cbce92df87e3d9c48"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3837c63dd6918a24de6c526277910e3766d8c2b1627c500b155f3eecad8fad65"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cdb7eb3cf3deb3dd9e7b8749323b5d970052711f9e1e9f36364163627f96da58"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26ab43b6d65d25b1a333c8d1b1c2f8399385ff683a35ab5e274ba7b8bb7dc61c"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75130df05aae7a7ac171b3b5b24714cffeabd054ad2ebc18870b3aa4526eba23"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c34f751bf67cab69638564eee34023909380ba3e0d8ee7f6fe473079bf93f09b"}, - {file = "rpds_py-0.19.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2671cb47e50a97f419a02cd1e0c339b31de017b033186358db92f4d8e2e17d8"}, - {file = "rpds_py-0.19.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c73254c256081704dba0a333457e2fb815364018788f9b501efe7c5e0ada401"}, - {file = "rpds_py-0.19.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4383beb4a29935b8fa28aca8fa84c956bf545cb0c46307b091b8d312a9150e6a"}, - {file = "rpds_py-0.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dbceedcf4a9329cc665452db1aaf0845b85c666e4885b92ee0cddb1dbf7e052a"}, - {file = "rpds_py-0.19.1-cp311-none-win32.whl", hash = "sha256:f0a6d4a93d2a05daec7cb885157c97bbb0be4da739d6f9dfb02e101eb40921cd"}, - {file = "rpds_py-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:c149a652aeac4902ecff2dd93c3b2681c608bd5208c793c4a99404b3e1afc87c"}, - {file = "rpds_py-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:56313be667a837ff1ea3508cebb1ef6681d418fa2913a0635386cf29cff35165"}, - {file = "rpds_py-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d1d7539043b2b31307f2c6c72957a97c839a88b2629a348ebabe5aa8b626d6b"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e1dc59a5e7bc7f44bd0c048681f5e05356e479c50be4f2c1a7089103f1621d5"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b8f78398e67a7227aefa95f876481485403eb974b29e9dc38b307bb6eb2315ea"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ef07a0a1d254eeb16455d839cef6e8c2ed127f47f014bbda64a58b5482b6c836"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8124101e92c56827bebef084ff106e8ea11c743256149a95b9fd860d3a4f331f"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08ce9c95a0b093b7aec75676b356a27879901488abc27e9d029273d280438505"}, - {file = "rpds_py-0.19.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b02dd77a2de6e49078c8937aadabe933ceac04b41c5dde5eca13a69f3cf144e"}, - {file = "rpds_py-0.19.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4dd02e29c8cbed21a1875330b07246b71121a1c08e29f0ee3db5b4cfe16980c4"}, - {file = "rpds_py-0.19.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9c7042488165f7251dc7894cd533a875d2875af6d3b0e09eda9c4b334627ad1c"}, - {file = "rpds_py-0.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f809a17cc78bd331e137caa25262b507225854073fd319e987bd216bed911b7c"}, - {file = "rpds_py-0.19.1-cp312-none-win32.whl", hash = "sha256:3ddab996807c6b4227967fe1587febade4e48ac47bb0e2d3e7858bc621b1cace"}, - {file = "rpds_py-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:32e0db3d6e4f45601b58e4ac75c6f24afbf99818c647cc2066f3e4b192dabb1f"}, - {file = "rpds_py-0.19.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:747251e428406b05fc86fee3904ee19550c4d2d19258cef274e2151f31ae9d38"}, - {file = "rpds_py-0.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dc733d35f861f8d78abfaf54035461e10423422999b360966bf1c443cbc42705"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbda75f245caecff8faa7e32ee94dfaa8312a3367397975527f29654cd17a6ed"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd04d8cab16cab5b0a9ffc7d10f0779cf1120ab16c3925404428f74a0a43205a"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2d66eb41ffca6cc3c91d8387509d27ba73ad28371ef90255c50cb51f8953301"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdf4890cda3b59170009d012fca3294c00140e7f2abe1910e6a730809d0f3f9b"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1fa67ef839bad3815124f5f57e48cd50ff392f4911a9f3cf449d66fa3df62a5"}, - {file = "rpds_py-0.19.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b82c9514c6d74b89a370c4060bdb80d2299bc6857e462e4a215b4ef7aa7b090e"}, - {file = "rpds_py-0.19.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c7b07959866a6afb019abb9564d8a55046feb7a84506c74a6f197cbcdf8a208e"}, - {file = "rpds_py-0.19.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4f580ae79d0b861dfd912494ab9d477bea535bfb4756a2269130b6607a21802e"}, - {file = "rpds_py-0.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c6d20c8896c00775e6f62d8373aba32956aa0b850d02b5ec493f486c88e12859"}, - {file = "rpds_py-0.19.1-cp313-none-win32.whl", hash = "sha256:afedc35fe4b9e30ab240b208bb9dc8938cb4afe9187589e8d8d085e1aacb8309"}, - {file = "rpds_py-0.19.1-cp313-none-win_amd64.whl", hash = "sha256:1d4af2eb520d759f48f1073ad3caef997d1bfd910dc34e41261a595d3f038a94"}, - {file = "rpds_py-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:34bca66e2e3eabc8a19e9afe0d3e77789733c702c7c43cd008e953d5d1463fde"}, - {file = "rpds_py-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:24f8ae92c7fae7c28d0fae9b52829235df83f34847aa8160a47eb229d9666c7b"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71157f9db7f6bc6599a852852f3389343bea34315b4e6f109e5cbc97c1fb2963"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d494887d40dc4dd0d5a71e9d07324e5c09c4383d93942d391727e7a40ff810b"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b3661e6d4ba63a094138032c1356d557de5b3ea6fd3cca62a195f623e381c76"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97fbb77eaeb97591efdc654b8b5f3ccc066406ccfb3175b41382f221ecc216e8"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cc4bc73e53af8e7a42c8fd7923bbe35babacfa7394ae9240b3430b5dcf16b2a"}, - {file = "rpds_py-0.19.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:35af5e4d5448fa179fd7fff0bba0fba51f876cd55212f96c8bbcecc5c684ae5c"}, - {file = "rpds_py-0.19.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3511f6baf8438326e351097cecd137eb45c5f019944fe0fd0ae2fea2fd26be39"}, - {file = "rpds_py-0.19.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:57863d16187995c10fe9cf911b897ed443ac68189179541734502353af33e693"}, - {file = "rpds_py-0.19.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9e318e6786b1e750a62f90c6f7fa8b542102bdcf97c7c4de2a48b50b61bd36ec"}, - {file = "rpds_py-0.19.1-cp38-none-win32.whl", hash = "sha256:53dbc35808c6faa2ce3e48571f8f74ef70802218554884787b86a30947842a14"}, - {file = "rpds_py-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:8df1c283e57c9cb4d271fdc1875f4a58a143a2d1698eb0d6b7c0d7d5f49c53a1"}, - {file = "rpds_py-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e76c902d229a3aa9d5ceb813e1cbcc69bf5bda44c80d574ff1ac1fa3136dea71"}, - {file = "rpds_py-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de1f7cd5b6b351e1afd7568bdab94934d656abe273d66cda0ceea43bbc02a0c2"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24fc5a84777cb61692d17988989690d6f34f7f95968ac81398d67c0d0994a897"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:74129d5ffc4cde992d89d345f7f7d6758320e5d44a369d74d83493429dad2de5"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e360188b72f8080fefa3adfdcf3618604cc8173651c9754f189fece068d2a45"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13e6d4840897d4e4e6b2aa1443e3a8eca92b0402182aafc5f4ca1f5e24f9270a"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f09529d2332264a902688031a83c19de8fda5eb5881e44233286b9c9ec91856d"}, - {file = "rpds_py-0.19.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d4b52811dcbc1aba08fd88d475f75b4f6db0984ba12275d9bed1a04b2cae9b5"}, - {file = "rpds_py-0.19.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dd635c2c4043222d80d80ca1ac4530a633102a9f2ad12252183bcf338c1b9474"}, - {file = "rpds_py-0.19.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f35b34a5184d5e0cc360b61664c1c06e866aab077b5a7c538a3e20c8fcdbf90b"}, - {file = "rpds_py-0.19.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d4ec0046facab83012d821b33cead742a35b54575c4edfb7ed7445f63441835f"}, - {file = "rpds_py-0.19.1-cp39-none-win32.whl", hash = "sha256:f5b8353ea1a4d7dfb59a7f45c04df66ecfd363bb5b35f33b11ea579111d4655f"}, - {file = "rpds_py-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:1fb93d3486f793d54a094e2bfd9cd97031f63fcb5bc18faeb3dd4b49a1c06523"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7d5c7e32f3ee42f77d8ff1a10384b5cdcc2d37035e2e3320ded909aa192d32c3"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:89cc8921a4a5028d6dd388c399fcd2eef232e7040345af3d5b16c04b91cf3c7e"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bca34e913d27401bda2a6f390d0614049f5a95b3b11cd8eff80fe4ec340a1208"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5953391af1405f968eb5701ebbb577ebc5ced8d0041406f9052638bafe52209d"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:840e18c38098221ea6201f091fc5d4de6128961d2930fbbc96806fb43f69aec1"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6d8b735c4d162dc7d86a9cf3d717f14b6c73637a1f9cd57fe7e61002d9cb1972"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce757c7c90d35719b38fa3d4ca55654a76a40716ee299b0865f2de21c146801c"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a9421b23c85f361a133aa7c5e8ec757668f70343f4ed8fdb5a4a14abd5437244"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3b823be829407393d84ee56dc849dbe3b31b6a326f388e171555b262e8456cc1"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:5e58b61dcbb483a442c6239c3836696b79f2cd8e7eec11e12155d3f6f2d886d1"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:39d67896f7235b2c886fb1ee77b1491b77049dcef6fbf0f401e7b4cbed86bbd4"}, - {file = "rpds_py-0.19.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8b32cd4ab6db50c875001ba4f5a6b30c0f42151aa1fbf9c2e7e3674893fb1dc4"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1c32e41de995f39b6b315d66c27dea3ef7f7c937c06caab4c6a79a5e09e2c415"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1a129c02b42d46758c87faeea21a9f574e1c858b9f358b6dd0bbd71d17713175"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:346557f5b1d8fd9966059b7a748fd79ac59f5752cd0e9498d6a40e3ac1c1875f"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31e450840f2f27699d014cfc8865cc747184286b26d945bcea6042bb6aa4d26e"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01227f8b3e6c8961490d869aa65c99653df80d2f0a7fde8c64ebddab2b9b02fd"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69084fd29bfeff14816666c93a466e85414fe6b7d236cfc108a9c11afa6f7301"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d2b88efe65544a7d5121b0c3b003ebba92bfede2ea3577ce548b69c5235185"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ea961a674172ed2235d990d7edf85d15d8dfa23ab8575e48306371c070cda67"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:5beffdbe766cfe4fb04f30644d822a1080b5359df7db3a63d30fa928375b2720"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:720f3108fb1bfa32e51db58b832898372eb5891e8472a8093008010911e324c5"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c2087dbb76a87ec2c619253e021e4fb20d1a72580feeaa6892b0b3d955175a71"}, - {file = "rpds_py-0.19.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ddd50f18ebc05ec29a0d9271e9dbe93997536da3546677f8ca00b76d477680c"}, - {file = "rpds_py-0.19.1.tar.gz", hash = "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520"}, + {file = "rpds_py-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2"}, + {file = "rpds_py-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9bb4a0d90fdb03437c109a17eade42dfbf6190408f29b2744114d11586611d6f"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6377e647bbfd0a0b159fe557f2c6c602c159fc752fa316572f012fc0bf67150"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb851b7df9dda52dc1415ebee12362047ce771fc36914586b2e9fcbd7d293b3e"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e0f80b739e5a8f54837be5d5c924483996b603d5502bfff79bf33da06164ee2"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a8c94dad2e45324fc74dce25e1645d4d14df9a4e54a30fa0ae8bad9a63928e3"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e604fe73ba048c06085beaf51147eaec7df856824bfe7b98657cf436623daf"}, + {file = "rpds_py-0.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:df3de6b7726b52966edf29663e57306b23ef775faf0ac01a3e9f4012a24a4140"}, + {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf258ede5bc22a45c8e726b29835b9303c285ab46fc7c3a4cc770736b5304c9f"}, + {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:55fea87029cded5df854ca7e192ec7bdb7ecd1d9a3f63d5c4eb09148acf4a7ce"}, + {file = "rpds_py-0.20.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ae94bd0b2f02c28e199e9bc51485d0c5601f58780636185660f86bf80c89af94"}, + {file = "rpds_py-0.20.0-cp310-none-win32.whl", hash = "sha256:28527c685f237c05445efec62426d285e47a58fb05ba0090a4340b73ecda6dee"}, + {file = "rpds_py-0.20.0-cp310-none-win_amd64.whl", hash = "sha256:238a2d5b1cad28cdc6ed15faf93a998336eb041c4e440dd7f902528b8891b399"}, + {file = "rpds_py-0.20.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac2f4f7a98934c2ed6505aead07b979e6f999389f16b714448fb39bbaa86a489"}, + {file = "rpds_py-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:220002c1b846db9afd83371d08d239fdc865e8f8c5795bbaec20916a76db3318"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d7919548df3f25374a1f5d01fbcd38dacab338ef5f33e044744b5c36729c8db"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:758406267907b3781beee0f0edfe4a179fbd97c0be2e9b1154d7f0a1279cf8e5"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3d61339e9f84a3f0767b1995adfb171a0d00a1185192718a17af6e124728e0f5"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1259c7b3705ac0a0bd38197565a5d603218591d3f6cee6e614e380b6ba61c6f6"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c1dc0f53856b9cc9a0ccca0a7cc61d3d20a7088201c0937f3f4048c1718a209"}, + {file = "rpds_py-0.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e60cb630f674a31f0368ed32b2a6b4331b8350d67de53c0359992444b116dd3"}, + {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbe982f38565bb50cb7fb061ebf762c2f254ca3d8c20d4006878766e84266272"}, + {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:514b3293b64187172bc77c8fb0cdae26981618021053b30d8371c3a902d4d5ad"}, + {file = "rpds_py-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0a26ffe9d4dd35e4dfdd1e71f46401cff0181c75ac174711ccff0459135fa58"}, + {file = "rpds_py-0.20.0-cp311-none-win32.whl", hash = "sha256:89c19a494bf3ad08c1da49445cc5d13d8fefc265f48ee7e7556839acdacf69d0"}, + {file = "rpds_py-0.20.0-cp311-none-win_amd64.whl", hash = "sha256:c638144ce971df84650d3ed0096e2ae7af8e62ecbbb7b201c8935c370df00a2c"}, + {file = "rpds_py-0.20.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a84ab91cbe7aab97f7446652d0ed37d35b68a465aeef8fc41932a9d7eee2c1a6"}, + {file = "rpds_py-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:56e27147a5a4c2c21633ff8475d185734c0e4befd1c989b5b95a5d0db699b21b"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2580b0c34583b85efec8c5c5ec9edf2dfe817330cc882ee972ae650e7b5ef739"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b80d4a7900cf6b66bb9cee5c352b2d708e29e5a37fe9bf784fa97fc11504bf6c"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50eccbf054e62a7b2209b28dc7a22d6254860209d6753e6b78cfaeb0075d7bee"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:49a8063ea4296b3a7e81a5dfb8f7b2d73f0b1c20c2af401fb0cdf22e14711a96"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea438162a9fcbee3ecf36c23e6c68237479f89f962f82dae83dc15feeceb37e4"}, + {file = "rpds_py-0.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:18d7585c463087bddcfa74c2ba267339f14f2515158ac4db30b1f9cbdb62c8ef"}, + {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d4c7d1a051eeb39f5c9547e82ea27cbcc28338482242e3e0b7768033cb083821"}, + {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4df1e3b3bec320790f699890d41c59d250f6beda159ea3c44c3f5bac1976940"}, + {file = "rpds_py-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2cf126d33a91ee6eedc7f3197b53e87a2acdac63602c0f03a02dd69e4b138174"}, + {file = "rpds_py-0.20.0-cp312-none-win32.whl", hash = "sha256:8bc7690f7caee50b04a79bf017a8d020c1f48c2a1077ffe172abec59870f1139"}, + {file = "rpds_py-0.20.0-cp312-none-win_amd64.whl", hash = "sha256:0e13e6952ef264c40587d510ad676a988df19adea20444c2b295e536457bc585"}, + {file = "rpds_py-0.20.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa9a0521aeca7d4941499a73ad7d4f8ffa3d1affc50b9ea11d992cd7eff18a29"}, + {file = "rpds_py-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1f1d51eccb7e6c32ae89243cb352389228ea62f89cd80823ea7dd1b98e0b91"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a86a9b96070674fc88b6f9f71a97d2c1d3e5165574615d1f9168ecba4cecb24"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c8ef2ebf76df43f5750b46851ed1cdf8f109d7787ca40035fe19fbdc1acc5a7"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b25f024b421d5859d156750ea9a65651793d51b76a2e9238c05c9d5f203a9"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57eb94a8c16ab08fef6404301c38318e2c5a32216bf5de453e2714c964c125c8"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1940dae14e715e2e02dfd5b0f64a52e8374a517a1e531ad9412319dc3ac7879"}, + {file = "rpds_py-0.20.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d20277fd62e1b992a50c43f13fbe13277a31f8c9f70d59759c88f644d66c619f"}, + {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:06db23d43f26478303e954c34c75182356ca9aa7797d22c5345b16871ab9c45c"}, + {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b2a5db5397d82fa847e4c624b0c98fe59d2d9b7cf0ce6de09e4d2e80f8f5b3f2"}, + {file = "rpds_py-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a35df9f5548fd79cb2f52d27182108c3e6641a4feb0f39067911bf2adaa3e57"}, + {file = "rpds_py-0.20.0-cp313-none-win32.whl", hash = "sha256:fd2d84f40633bc475ef2d5490b9c19543fbf18596dcb1b291e3a12ea5d722f7a"}, + {file = "rpds_py-0.20.0-cp313-none-win_amd64.whl", hash = "sha256:9bc2d153989e3216b0559251b0c260cfd168ec78b1fac33dd485750a228db5a2"}, + {file = "rpds_py-0.20.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:f2fbf7db2012d4876fb0d66b5b9ba6591197b0f165db8d99371d976546472a24"}, + {file = "rpds_py-0.20.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1e5f3cd7397c8f86c8cc72d5a791071431c108edd79872cdd96e00abd8497d29"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce9845054c13696f7af7f2b353e6b4f676dab1b4b215d7fe5e05c6f8bb06f965"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3e130fd0ec56cb76eb49ef52faead8ff09d13f4527e9b0c400307ff72b408e1"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b16aa0107ecb512b568244ef461f27697164d9a68d8b35090e9b0c1c8b27752"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa7f429242aae2947246587d2964fad750b79e8c233a2367f71b554e9447949c"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af0fc424a5842a11e28956e69395fbbeab2c97c42253169d87e90aac2886d751"}, + {file = "rpds_py-0.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8c00a3b1e70c1d3891f0db1b05292747f0dbcfb49c43f9244d04c70fbc40eb8"}, + {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:40ce74fc86ee4645d0a225498d091d8bc61f39b709ebef8204cb8b5a464d3c0e"}, + {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4fe84294c7019456e56d93e8ababdad5a329cd25975be749c3f5f558abb48253"}, + {file = "rpds_py-0.20.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:338ca4539aad4ce70a656e5187a3a31c5204f261aef9f6ab50e50bcdffaf050a"}, + {file = "rpds_py-0.20.0-cp38-none-win32.whl", hash = "sha256:54b43a2b07db18314669092bb2de584524d1ef414588780261e31e85846c26a5"}, + {file = "rpds_py-0.20.0-cp38-none-win_amd64.whl", hash = "sha256:a1862d2d7ce1674cffa6d186d53ca95c6e17ed2b06b3f4c476173565c862d232"}, + {file = "rpds_py-0.20.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:3fde368e9140312b6e8b6c09fb9f8c8c2f00999d1823403ae90cc00480221b22"}, + {file = "rpds_py-0.20.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9824fb430c9cf9af743cf7aaf6707bf14323fb51ee74425c380f4c846ea70789"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11ef6ce74616342888b69878d45e9f779b95d4bd48b382a229fe624a409b72c5"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52d3f2f82b763a24ef52f5d24358553e8403ce05f893b5347098014f2d9eff2"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d35cef91e59ebbeaa45214861874bc6f19eb35de96db73e467a8358d701a96c"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d72278a30111e5b5525c1dd96120d9e958464316f55adb030433ea905866f4de"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c29cbbba378759ac5786730d1c3cb4ec6f8ababf5c42a9ce303dc4b3d08cda"}, + {file = "rpds_py-0.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6632f2d04f15d1bd6fe0eedd3b86d9061b836ddca4c03d5cf5c7e9e6b7c14580"}, + {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d0b67d87bb45ed1cd020e8fbf2307d449b68abc45402fe1a4ac9e46c3c8b192b"}, + {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec31a99ca63bf3cd7f1a5ac9fe95c5e2d060d3c768a09bc1d16e235840861420"}, + {file = "rpds_py-0.20.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22e6c9976e38f4d8c4a63bd8a8edac5307dffd3ee7e6026d97f3cc3a2dc02a0b"}, + {file = "rpds_py-0.20.0-cp39-none-win32.whl", hash = "sha256:569b3ea770c2717b730b61998b6c54996adee3cef69fc28d444f3e7920313cf7"}, + {file = "rpds_py-0.20.0-cp39-none-win_amd64.whl", hash = "sha256:e6900ecdd50ce0facf703f7a00df12374b74bbc8ad9fe0f6559947fb20f82364"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:617c7357272c67696fd052811e352ac54ed1d9b49ab370261a80d3b6ce385045"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9426133526f69fcaba6e42146b4e12d6bc6c839b8b555097020e2b78ce908dcc"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deb62214c42a261cb3eb04d474f7155279c1a8a8c30ac89b7dcb1721d92c3c02"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcaeb7b57f1a1e071ebd748984359fef83ecb026325b9d4ca847c95bc7311c92"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d454b8749b4bd70dd0a79f428731ee263fa6995f83ccb8bada706e8d1d3ff89d"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d807dc2051abe041b6649681dce568f8e10668e3c1c6543ebae58f2d7e617855"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c20f0ddeb6e29126d45f89206b8291352b8c5b44384e78a6499d68b52ae511"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7f19250ceef892adf27f0399b9e5afad019288e9be756d6919cb58892129f51"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4f1ed4749a08379555cebf4650453f14452eaa9c43d0a95c49db50c18b7da075"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:dcedf0b42bcb4cfff4101d7771a10532415a6106062f005ab97d1d0ab5681c60"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:39ed0d010457a78f54090fafb5d108501b5aa5604cc22408fc1c0c77eac14344"}, + {file = "rpds_py-0.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bb273176be34a746bdac0b0d7e4e2c467323d13640b736c4c477881a3220a989"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f918a1a130a6dfe1d7fe0f105064141342e7dd1611f2e6a21cd2f5c8cb1cfb3e"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f60012a73aa396be721558caa3a6fd49b3dd0033d1675c6d59c4502e870fcf0c"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d2b1ad682a3dfda2a4e8ad8572f3100f95fad98cb99faf37ff0ddfe9cbf9d03"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:614fdafe9f5f19c63ea02817fa4861c606a59a604a77c8cdef5aa01d28b97921"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa518bcd7600c584bf42e6617ee8132869e877db2f76bcdc281ec6a4113a53ab"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0475242f447cc6cb8a9dd486d68b2ef7fbee84427124c232bff5f63b1fe11e5"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f90a4cd061914a60bd51c68bcb4357086991bd0bb93d8aa66a6da7701370708f"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:def7400461c3a3f26e49078302e1c1b38f6752342c77e3cf72ce91ca69fb1bc1"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:65794e4048ee837494aea3c21a28ad5fc080994dfba5b036cf84de37f7ad5074"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:faefcc78f53a88f3076b7f8be0a8f8d35133a3ecf7f3770895c25f8813460f08"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5b4f105deeffa28bbcdff6c49b34e74903139afa690e35d2d9e3c2c2fba18cec"}, + {file = "rpds_py-0.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fdfc3a892927458d98f3d55428ae46b921d1f7543b89382fdb483f5640daaec8"}, + {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"}, ] [[package]] @@ -6612,14 +6629,6 @@ files = [ {file = "SQLAlchemy-2.0.21-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b69f1f754d92eb1cc6b50938359dead36b96a1dcf11a8670bff65fd9b21a4b09"}, {file = "SQLAlchemy-2.0.21-cp311-cp311-win32.whl", hash = "sha256:af520a730d523eab77d754f5cf44cc7dd7ad2d54907adeb3233177eeb22f271b"}, {file = "SQLAlchemy-2.0.21-cp311-cp311-win_amd64.whl", hash = "sha256:141675dae56522126986fa4ca713739d00ed3a6f08f3c2eb92c39c6dfec463ce"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:56628ca27aa17b5890391ded4e385bf0480209726f198799b7e980c6bd473bd7"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db726be58837fe5ac39859e0fa40baafe54c6d54c02aba1d47d25536170b690f"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7421c1bfdbb7214313919472307be650bd45c4dc2fcb317d64d078993de045b"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:632784f7a6f12cfa0e84bf2a5003b07660addccf5563c132cd23b7cc1d7371a9"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f6f7276cf26145a888f2182a98f204541b519d9ea358a65d82095d9c9e22f917"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2a1f7ffac934bc0ea717fa1596f938483fb8c402233f9b26679b4f7b38d6ab6e"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-win32.whl", hash = "sha256:bfece2f7cec502ec5f759bbc09ce711445372deeac3628f6fa1c16b7fb45b682"}, - {file = "SQLAlchemy-2.0.21-cp312-cp312-win_amd64.whl", hash = "sha256:526b869a0f4f000d8d8ee3409d0becca30ae73f494cbb48801da0129601f72c6"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7614f1eab4336df7dd6bee05bc974f2b02c38d3d0c78060c5faa4cd1ca2af3b8"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d59cb9e20d79686aa473e0302e4a82882d7118744d30bb1dfb62d3c47141b3ec"}, {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a95aa0672e3065d43c8aa80080cdd5cc40fe92dc873749e6c1cf23914c4b83af"}, From 557014e06b7deced340f5118af79ccd774df3d67 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 7 Aug 2024 13:29:53 +0200 Subject: [PATCH 06/17] Task updates and updates to SQLAlchemy Adapter --- cognee/api/v1/cognify/cognify_v2.py | 10 +++++----- cognee/tasks/__init__.py | 9 +++++++++ cognee/tasks/chunk_extract_summary/__init__.py | 0 cognee/tasks/chunk_naive_llm_classifier/__init__.py | 0 cognee/tasks/chunk_remove_disconnected/__init__.py | 0 cognee/tasks/chunk_to_graph_decomposition/__init__.py | 0 cognee/tasks/chunk_update_check/__init__.py | 0 cognee/tasks/chunks_into_graph/__init__.py | 0 .../chunks_into_graph.py} | 2 +- .../save_chunks_to_store.py} | 2 +- cognee/tasks/source_documents_to_chunks/__init__.py | 0 pyproject.toml | 4 ++++ 12 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 cognee/tasks/__init__.py create mode 100644 cognee/tasks/chunk_extract_summary/__init__.py create mode 100644 cognee/tasks/chunk_naive_llm_classifier/__init__.py create mode 100644 cognee/tasks/chunk_remove_disconnected/__init__.py create mode 100644 cognee/tasks/chunk_to_graph_decomposition/__init__.py create mode 100644 cognee/tasks/chunk_update_check/__init__.py create mode 100644 cognee/tasks/chunks_into_graph/__init__.py rename cognee/tasks/{graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py => chunks_into_graph/chunks_into_graph.py} (98%) rename cognee/tasks/{chunk_to_vector_graphstore/chunk_to_vector_graphstore.py => save_chunks_to_store/save_chunks_to_store.py} (97%) create mode 100644 cognee/tasks/source_documents_to_chunks/__init__.py diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 95e56108..b2a03ebf 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -31,10 +31,10 @@ from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task -from cognee.tasks.chunk_to_vector_graphstore.chunk_to_vector_graphstore import chunk_to_vector_graphstore_task +from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store_task from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task -from cognee.tasks.graph_decomposition_to_graph_nodes.graph_decomposition_to_graph_nodes import \ - graph_decomposition_to_graph_nodes_task +from cognee.tasks.chunks_into_graph.chunks_into_graph import \ + chunks_into_graph_task from cognee.tasks.source_documents_to_chunks.source_documents_to_chunks import source_documents_to_chunks logger = logging.getLogger("cognify.v2") @@ -111,10 +111,10 @@ async def run_cognify_pipeline(dataset: Dataset): tasks = [ Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data - Task(graph_decomposition_to_graph_nodes_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes + Task(chunks_into_graph_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes Task(chunk_update_check_task, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks Task( - chunk_to_vector_graphstore_task, + save_chunks_to_store_task, collection_name = "chunks", ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) run_tasks_parallel([ diff --git a/cognee/tasks/__init__.py b/cognee/tasks/__init__.py new file mode 100644 index 00000000..4029b0c4 --- /dev/null +++ b/cognee/tasks/__init__.py @@ -0,0 +1,9 @@ +from . chunk_extract_summary import chunk_extract_summary +from . chunk_naive_llm_classifier import chunk_naive_llm_classifier +from . chunk_remove_disconnected import chunk_remove_disconnected +from . chunk_to_graph_decomposition import chunk_to_graph_decomposition +from . save_chunks_to_store import save_chunks_to_store +from . chunk_update_check import chunk_update_check +from . chunks_into_graph import chunks_into_graph +from . source_documents_to_chunks import source_documents_to_chunks + diff --git a/cognee/tasks/chunk_extract_summary/__init__.py b/cognee/tasks/chunk_extract_summary/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/chunk_naive_llm_classifier/__init__.py b/cognee/tasks/chunk_naive_llm_classifier/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/chunk_remove_disconnected/__init__.py b/cognee/tasks/chunk_remove_disconnected/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/chunk_to_graph_decomposition/__init__.py b/cognee/tasks/chunk_to_graph_decomposition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/chunk_update_check/__init__.py b/cognee/tasks/chunk_update_check/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/chunks_into_graph/__init__.py b/cognee/tasks/chunks_into_graph/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py b/cognee/tasks/chunks_into_graph/chunks_into_graph.py similarity index 98% rename from cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py rename to cognee/tasks/chunks_into_graph/chunks_into_graph.py index 2c7539e3..40747ffb 100644 --- a/cognee/tasks/graph_decomposition_to_graph_nodes/graph_decomposition_to_graph_nodes.py +++ b/cognee/tasks/chunks_into_graph/chunks_into_graph.py @@ -18,7 +18,7 @@ class EntityNode(BaseModel): created_at: datetime updated_at: datetime -async def graph_decomposition_to_graph_nodes_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): +async def chunks_into_graph_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): chunk_graphs = await asyncio.gather( *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) diff --git a/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py similarity index 97% rename from cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py rename to cognee/tasks/save_chunks_to_store/save_chunks_to_store.py index aceb879f..6ae4fb89 100644 --- a/cognee/tasks/chunk_to_vector_graphstore/chunk_to_vector_graphstore.py +++ b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py @@ -2,7 +2,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def chunk_to_vector_graphstore_task(data_chunks: list[DocumentChunk], collection_name: str): +async def save_chunks_to_store_task(data_chunks: list[DocumentChunk], collection_name: str): if len(data_chunks) == 0: return data_chunks diff --git a/cognee/tasks/source_documents_to_chunks/__init__.py b/cognee/tasks/source_documents_to_chunks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyproject.toml b/pyproject.toml index 489348fa..53c5681b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,10 @@ weaviate = ["weaviate-client"] qdrant = ["qdrant-client"] neo4j = ["neo4j", "py2neo"] notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +langchain = ["langfuse"] + + +poetry install cognee --extras=langchain [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" From 2e367198cd32acaad1691cc108d6424db97ed673 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 7 Aug 2024 18:21:14 +0200 Subject: [PATCH 07/17] Task updates and updates to SQLAlchemy Adapter --- cognee/api/v1/add/add.py | 1 + cognee/api/v1/cognify/cognify.py | 308 ------------------ .../sqlalchemy/SqlAlchemyAdapter.py | 65 ++-- .../data_summary/summarize_text_chunks.py | 37 --- .../data/processing/filter_affected_chunks.py | 25 -- .../data/processing/process_documents.py | 44 --- .../data/processing/remove_obsolete_chunks.py | 29 -- .../search/llm/get_relevant_summary.py | 1 - .../chunk_extract_summary.py | 2 +- .../models/TextSummary.py | 0 .../save_chunks_to_store}/__init__.py | 0 cognee/tests/test_library.py | 3 +- 12 files changed, 42 insertions(+), 473 deletions(-) delete mode 100644 cognee/api/v1/cognify/cognify.py delete mode 100644 cognee/modules/data/extraction/data_summary/summarize_text_chunks.py delete mode 100644 cognee/modules/data/processing/filter_affected_chunks.py delete mode 100644 cognee/modules/data/processing/process_documents.py delete mode 100644 cognee/modules/data/processing/remove_obsolete_chunks.py rename cognee/{modules/data/extraction/data_summary => tasks/chunk_extract_summary}/models/TextSummary.py (100%) rename cognee/{modules/data/extraction/data_summary => tasks/save_chunks_to_store}/__init__.py (100%) diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 90e25950..a1077ee2 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -128,6 +128,7 @@ async def data_resources(file_paths: str, user: User): data.mime_type = file_metadata["mime_type"] await session.merge(data) + await session.commit() else: data = Data( id = data_id, diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py deleted file mode 100644 index a11a2cb8..00000000 --- a/cognee/api/v1/cognify/cognify.py +++ /dev/null @@ -1,308 +0,0 @@ -import asyncio -from uuid import uuid4 -from typing import List, Union -import logging -import nltk -from asyncio import Lock -from nltk.corpus import stopwords - -from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine -from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.databases.vector.embeddings.LiteLLMEmbeddingEngine import LiteLLMEmbeddingEngine -from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \ - graph_ready_output, connect_nodes_in_graph -from cognee.modules.cognify.graph.add_data_chunks import add_data_chunks -from cognee.modules.cognify.graph.add_document_node import add_document_node -from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes -from cognee.modules.cognify.graph.add_cognitive_layer_graphs import add_cognitive_layer_graphs -from cognee.modules.cognify.graph.add_summary_nodes import add_summary_nodes -from cognee.modules.cognify.llm.resolve_cross_graph_references import resolve_cross_graph_references -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.modules.cognify.graph.add_cognitive_layers import add_cognitive_layers -from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException -from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file -from cognee.modules.data.get_content_categories import get_content_categories -from cognee.modules.data.get_content_summary import get_content_summary -from cognee.modules.data.get_cognitive_layers import get_cognitive_layers -from cognee.modules.data.get_layer_graphs import get_layer_graphs -from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy, ChunkEngine -from cognee.shared.utils import send_telemetry -from cognee.modules.tasks import create_task_status_table, update_task_status -from cognee.shared.SourceCodeGraph import SourceCodeGraph -from cognee.modules.tasks import get_task_status -from cognee.modules.data.operations.get_dataset_data import get_dataset_data -from cognee.infrastructure.data.chunking.config import get_chunk_config -from cognee.modules.cognify.config import get_cognify_config -from cognee.infrastructure.databases.relational import get_relational_engine - -USER_ID = "default_user" - -logger = logging.getLogger("cognify") - -update_status_lock = Lock() - -async def cognify(datasets: Union[str, List[str]] = None): - """This function is responsible for the cognitive processing of the content.""" - # Has to be loaded in advance, multithreading doesn't work without it. - nltk.download("stopwords", quiet=True) - stopwords.ensure_loaded() - await create_task_status_table() - - graph_client = await get_graph_engine() - - db_engine = get_relational_engine() - - if datasets is None or len(datasets) == 0: - datasets = await db_engine.get_datasets() - - awaitables = [] - - async def handle_cognify_task(dataset_name: str): - async with update_status_lock: - task_status = get_task_status([dataset_name]) - - if dataset_name in task_status and task_status[dataset_name] == "DATASET_PROCESSING_STARTED": - logger.info(f"Dataset {dataset_name} is being processed.") - return - - update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") - - try: - await cognify(dataset_name) - update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") - except Exception as error: - update_task_status(dataset_name, "DATASET_PROCESSING_ERROR") - raise error - - - # datasets is a list of dataset names - if isinstance(datasets, list): - for dataset_name in datasets: - awaitables.append(handle_cognify_task(dataset_name)) - - graphs = await asyncio.gather(*awaitables) - return graphs[0] - - added_datasets = await db_engine.get_datasets() - - # datasets is a dataset name string - dataset_files = [] - dataset_name = datasets.replace(".", "_").replace(" ", "_") - - for added_dataset in added_datasets: - if dataset_name in added_dataset: - dataset_files.append((added_dataset, await get_dataset_data(dataset_name = added_dataset))) - - chunk_config = get_chunk_config() - chunk_engine = get_chunk_engine() - chunk_strategy = chunk_config.chunk_strategy - - async def process_batch(files_batch): - data_chunks = {} - - for dataset_name, file_metadata, document_id in files_batch: - with open(file_metadata["file_path"], "rb") as file: - try: - file_type = guess_file_type(file) - text = extract_text_from_file(file, file_type) - if text is None: - text = "empty file" - if text == "": - text = "empty file" - subchunks,_ = chunk_engine.chunk_data(chunk_strategy, text, chunk_config.chunk_size, chunk_config.chunk_overlap) - - if dataset_name not in data_chunks: - data_chunks[dataset_name] = [] - - for subchunk in subchunks: - data_chunks[dataset_name].append(dict( - document_id = document_id, - chunk_id = str(uuid4()), - text = subchunk, - file_metadata = file_metadata, - )) - - except FileTypeException: - logger.warning("File (%s) has an unknown file type. We are skipping it.", file_metadata["id"]) - - added_chunks = await add_data_chunks(data_chunks) - # await add_data_chunks_basic_rag(data_chunks) - - await asyncio.gather( - *[process_text( - chunk["collection"], - chunk["chunk_id"], - chunk["text"], - chunk["file_metadata"], - chunk["document_id"] - ) for chunk in added_chunks] - ) - - batch_size = 20 - file_count = 0 - files_batch = [] - - graph_config = get_graph_config() - - if graph_config.infer_graph_topology and graph_config.graph_topology_task: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - await topology_engine.add_graph_topology(dataset_files=dataset_files) - elif not graph_config.infer_graph_topology: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - await topology_engine.add_graph_topology(graph_config.topology_file_path) - elif not graph_config.graph_topology_task: - parent_node_id = f"DefaultGraphModel__{USER_ID}" - - - for (dataset_name, files) in dataset_files: - for file_metadata in files: - if parent_node_id: - document_id = await add_document_node( - graph_client, - parent_node_id = parent_node_id, - document_metadata = file_metadata, - ) - else: - document_id = await add_document_node( - graph_client, - parent_node_id = file_metadata["id"], - document_metadata = file_metadata, - ) - - files_batch.append((dataset_name, file_metadata, document_id)) - file_count += 1 - - if file_count >= batch_size: - await process_batch(files_batch) - files_batch = [] - file_count = 0 - - # Process any remaining files in the last batch - if len(files_batch) > 0: - await process_batch(files_batch) - - return graph_client.graph - - -async def process_text(chunk_collection: str, chunk_id: str, input_text: str, file_metadata: dict, document_id: str): - print(f"Processing chunk ({chunk_id}) from document ({file_metadata['id']}).") - - graph_config = get_graph_config() - graph_client = await get_graph_engine() - graph_topology = graph_config.graph_model - - if graph_topology == SourceCodeGraph: - classified_categories = [{"data_type": "text", "category_name": "Code and functions"}] - elif graph_topology == KnowledgeGraph: - classified_categories = await get_content_categories(input_text) - else: - classified_categories = [{"data_type": "text", "category_name": "Unclassified text"}] - - await add_classification_nodes( - graph_client, - parent_node_id = document_id, - categories = classified_categories, - ) - print(f"Chunk ({chunk_id}) classified.") - - content_summary = await get_content_summary(input_text) - await add_summary_nodes(graph_client, document_id, content_summary) - print(f"Chunk ({chunk_id}) summarized.") - - cognify_config = get_cognify_config() - - cognitive_layers = await get_cognitive_layers(input_text, classified_categories) - cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit] - - try: - cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2] - print("cognitive_layers", cognitive_layers) - layer_graphs = await get_layer_graphs(input_text, cognitive_layers) - await add_cognitive_layer_graphs(graph_client, chunk_collection, chunk_id, layer_graphs) - except: - pass - - - if cognify_config.connect_documents is True: - db_engine = get_relational_engine() - relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id) - - list_of_nodes = [] - - relevant_documents_to_connect.append({ - "layer_id": document_id, - }) - - for document in relevant_documents_to_connect: - node_descriptions_to_match = await graph_client.extract_node_description(document["layer_id"]) - list_of_nodes.extend(node_descriptions_to_match) - - nodes_by_layer = await group_nodes_by_layer(list_of_nodes) - - results = await resolve_cross_graph_references(nodes_by_layer) - - relationships = graph_ready_output(results) - - await connect_nodes_in_graph( - graph_client, - relationships, - score_threshold = cognify_config.intra_layer_score_treshold - ) - - send_telemetry("cognee.cognify") - - print(f"Chunk ({chunk_id}) cognified.") - - - -if __name__ == "__main__": - - async def test(): - # await prune.prune_system() - # # - # from cognee.api.v1.add import add - # data_directory_path = os.path.abspath("../../.data") - # # print(data_directory_path) - # # config.data_root_directory(data_directory_path) - # # cognee_directory_path = os.path.abspath(".cognee_system") - # # config.system_root_directory(cognee_directory_path) - # - # await add("data://" +data_directory_path, "example") - - text = """Conservative PP in the lead in Spain, according to estimate - An estimate has been published for Spain: - - Opposition leader Alberto Núñez Feijóo’s conservative People’s party (PP): 32.4% - - Spanish prime minister Pedro Sánchez’s Socialist party (PSOE): 30.2% - - The far-right Vox party: 10.4% - - In Spain, the right has sought to turn the European election into a referendum on Sánchez. - - Ahead of the vote, public attention has focused on a saga embroiling the prime minister’s wife, Begoña Gómez, who is being investigated over allegations of corruption and influence-peddling, which Sanchez has dismissed as politically-motivated and totally baseless.""" - - from cognee.api.v1.add import add - - await add([text], "example_dataset") - - from cognee.api.v1.config.config import config - config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE ) - config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER) - config.embedding_engine = LiteLLMEmbeddingEngine() - - graph = await cognify() - # vector_client = infrastructure_config.get_config("vector_engine") - # - # out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10) - # - # print("results", out) - # - # from cognee.shared.utils import render_graph - # - # await render_graph(graph, include_color=True, include_nodes=False, include_size=False) - - import asyncio - asyncio.run(test()) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index 799c79d8..0bec5cd9 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -13,7 +13,11 @@ def make_async_sessionmaker(sessionmaker): @asynccontextmanager async def async_session_maker(): await asyncio.sleep(0.1) - yield FakeAsyncSession(sessionmaker()) + session = FakeAsyncSession(sessionmaker()) + try: + yield session + finally: + await session.close() # Ensure the session is closed return async_session_maker @@ -26,28 +30,34 @@ def __init__(self, db_type: str, db_path: str, db_name: str, db_user: str, db_pa LocalStorage.ensure_directory_exists(db_path) self.engine = create_engine(f"duckdb:///{self.db_location}") - self.sessionmaker = make_async_sessionmaker(sessionmaker(bind = self.engine)) + self.sessionmaker = make_async_sessionmaker(sessionmaker(bind=self.engine)) else: self.engine = create_async_engine(f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}") - self.sessionmaker = async_sessionmaker(bind = self.engine, expire_on_commit = False) - + self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False) @asynccontextmanager async def get_async_session(self) -> AsyncGenerator[AsyncSession, None]: async_session_maker = self.sessionmaker async with async_session_maker() as session: - yield session + try: + yield session + finally: + await session.close() # Ensure the session is closed def get_session(self): session_maker = self.sessionmaker with session_maker() as session: - yield session + try: + yield session + finally: + session.close() # Ensure the session is closed async def get_datasets(self): from cognee.modules.data.models import Dataset async with self.get_async_session() as session: - datasets = (await session.execute(select(Dataset).options(joinedload(Dataset.data)))).unique().scalars().all() + result = await session.execute(select(Dataset).options(joinedload(Dataset.data))) + datasets = result.unique().scalars().all() return datasets async def create_table(self, schema_name: str, table_name: str, table_config: list[dict]): @@ -55,20 +65,21 @@ async def create_table(self, schema_name: str, table_name: str, table_config: li async with self.engine.begin() as connection: await connection.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name};")) await connection.execute(text(f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ({', '.join(fields_query_parts)});")) + await connection.close() async def delete_table(self, table_name: str): - async with self.engine.connect() as connection: + async with self.engine.begin() as connection: await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;")) - + await connection.close() async def insert_data(self, schema_name: str, table_name: str, data: list[dict]): columns = ", ".join(data[0].keys()) values = ", ".join([f"({', '.join([f':{key}' for key in row.keys()])})" for row in data]) insert_query = text(f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES {values};") - async with self.engine.connect() as connection: + async with self.engine.begin() as connection: await connection.execute(insert_query, data) - + await connection.close() async def get_data(self, table_name: str, filters: dict = None): - async with self.engine.connect() as connection: + async with self.engine.begin() as connection: query = f"SELECT * FROM {table_name}" if filters: filter_conditions = " AND ".join([ @@ -85,26 +96,26 @@ async def get_data(self, table_name: str, filters: dict = None): return {result["data_id"]: result["status"] for result in results} async def execute_query(self, query): - async with self.engine.connect() as connection: + async with self.engine.begin() as connection: result = await connection.execute(text(query)) return [dict(row) for row in result] - async def drop_tables(self, connection): - try: - await connection.execute(text("DROP TABLE IF EXISTS group_permission CASCADE")) - await connection.execute(text("DROP TABLE IF EXISTS permissions CASCADE")) - # Add more DROP TABLE statements for other tables as needed - print("Database tables dropped successfully.") - except Exception as e: - print(f"Error dropping database tables: {e}") + async def drop_tables(self): + async with self.engine.begin() as connection: + try: + await connection.execute(text("DROP TABLE IF EXISTS group_permission CASCADE")) + await connection.execute(text("DROP TABLE IF EXISTS permissions CASCADE")) + # Add more DROP TABLE statements for other tables as needed + print("Database tables dropped successfully.") + except Exception as e: + print(f"Error dropping database tables: {e}") async def delete_database(self): - async with self.engine.connect() as connection: + async with self.engine.begin() as connection: try: - async with connection.begin() as trans: - for table in Base.metadata.sorted_tables: - drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE') - await connection.execute(drop_table_query) + for table in Base.metadata.sorted_tables: + drop_table_query = text(f'DROP TABLE IF EXISTS {table.name} CASCADE') + await connection.execute(drop_table_query) print("Database deleted successfully.") except Exception as e: - print(f"Error deleting database: {e}") \ No newline at end of file + print(f"Error deleting database: {e}") diff --git a/cognee/modules/data/extraction/data_summary/summarize_text_chunks.py b/cognee/modules/data/extraction/data_summary/summarize_text_chunks.py deleted file mode 100644 index 922ece6c..00000000 --- a/cognee/modules/data/extraction/data_summary/summarize_text_chunks.py +++ /dev/null @@ -1,37 +0,0 @@ - -import asyncio -from typing import Type -from pydantic import BaseModel -from cognee.infrastructure.databases.vector import get_vector_engine, DataPoint -from ...processing.chunk_types.DocumentChunk import DocumentChunk -from ...extraction.extract_summary import extract_summary -from .models.TextSummary import TextSummary - -async def summarize_text_chunks(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel], collection_name: str = "summaries"): - if len(data_chunks) == 0: - return data_chunks - - chunk_summaries = await asyncio.gather( - *[extract_summary(chunk.text, summarization_model) for chunk in data_chunks] - ) - - vector_engine = get_vector_engine() - - await vector_engine.create_collection(collection_name, payload_schema = TextSummary) - - await vector_engine.create_data_points( - collection_name, - [ - DataPoint[TextSummary]( - id = str(chunk.chunk_id), - payload = dict( - chunk_id = str(chunk.chunk_id), - document_id = str(chunk.document_id), - text = chunk_summaries[chunk_index].summary, - ), - embed_field = "text", - ) for (chunk_index, chunk) in enumerate(data_chunks) - ], - ) - - return data_chunks diff --git a/cognee/modules/data/processing/filter_affected_chunks.py b/cognee/modules/data/processing/filter_affected_chunks.py deleted file mode 100644 index 599d16eb..00000000 --- a/cognee/modules/data/processing/filter_affected_chunks.py +++ /dev/null @@ -1,25 +0,0 @@ -from cognee.infrastructure.databases.vector import get_vector_engine -from .chunk_types import DocumentChunk - -async def filter_affected_chunks(data_chunks: list[DocumentChunk], collection_name: str) -> list[DocumentChunk]: - vector_engine = get_vector_engine() - - if not await vector_engine.has_collection(collection_name): - # If collection doesn't exist, all data_chunks are new - return data_chunks - - existing_chunks = await vector_engine.retrieve( - collection_name, - [str(chunk.chunk_id) for chunk in data_chunks], - ) - - existing_chunks_map = {chunk.id: chunk.payload for chunk in existing_chunks} - - affected_data_chunks = [] - - for chunk in data_chunks: - if chunk.chunk_id not in existing_chunks_map or \ - chunk.text != existing_chunks_map[chunk.chunk_id]["text"]: - affected_data_chunks.append(chunk) - - return affected_data_chunks diff --git a/cognee/modules/data/processing/process_documents.py b/cognee/modules/data/processing/process_documents.py deleted file mode 100644 index 8df8067b..00000000 --- a/cognee/modules/data/processing/process_documents.py +++ /dev/null @@ -1,44 +0,0 @@ -from cognee.infrastructure.databases.graph import get_graph_engine -from .document_types import Document - -async def process_documents(documents: list[Document], parent_node_id: str = None, user:str=None, user_permissions:str=None): - graph_engine = await get_graph_engine() - - nodes = [] - edges = [] - - if parent_node_id and await graph_engine.extract_node(parent_node_id) is None: - nodes.append((parent_node_id, {})) - - document_nodes = await graph_engine.extract_nodes([str(document.id) for document in documents]) - - for (document_index, document) in enumerate(documents): - document_node = document_nodes[document_index] if document_index in document_nodes else None - - if document_node is None: - document_dict = document.to_dict() - document_dict["user"] = user - document_dict["user_permissions"] = user_permissions - nodes.append((str(document.id), document.to_dict())) - - if parent_node_id: - edges.append(( - parent_node_id, - str(document.id), - "has_document", - dict( - relationship_name = "has_document", - source_node_id = parent_node_id, - target_node_id = str(document.id), - ), - )) - - if len(nodes) > 0: - await graph_engine.add_nodes(nodes) - await graph_engine.add_edges(edges) - - for document in documents: - document_reader = document.get_reader() - - for document_chunk in document_reader.read(max_chunk_size = 1024): - yield document_chunk diff --git a/cognee/modules/data/processing/remove_obsolete_chunks.py b/cognee/modules/data/processing/remove_obsolete_chunks.py deleted file mode 100644 index 1bc70a39..00000000 --- a/cognee/modules/data/processing/remove_obsolete_chunks.py +++ /dev/null @@ -1,29 +0,0 @@ - -from cognee.infrastructure.databases.graph import get_graph_engine -# from cognee.infrastructure.databases.vector import get_vector_engine -from .chunk_types import DocumentChunk - -async def remove_obsolete_chunks(data_chunks: list[DocumentChunk]) -> list[DocumentChunk]: - graph_engine = await get_graph_engine() - - document_ids = set((data_chunk.document_id for data_chunk in data_chunks)) - - obsolete_chunk_ids = [] - - for document_id in document_ids: - chunk_ids = await graph_engine.get_successor_ids(document_id, edge_label = "has_chunk") - - for chunk_id in chunk_ids: - previous_chunks = await graph_engine.get_predecessor_ids(chunk_id, edge_label = "next_chunk") - - if len(previous_chunks) == 0: - obsolete_chunk_ids.append(chunk_id) - - if len(obsolete_chunk_ids) > 0: - await graph_engine.delete_nodes(obsolete_chunk_ids) - - disconnected_nodes = await graph_engine.get_disconnected_nodes() - if len(disconnected_nodes) > 0: - await graph_engine.delete_nodes(disconnected_nodes) - - return data_chunks diff --git a/cognee/modules/search/llm/get_relevant_summary.py b/cognee/modules/search/llm/get_relevant_summary.py index 4de0a971..f5a3c8ef 100644 --- a/cognee/modules/search/llm/get_relevant_summary.py +++ b/cognee/modules/search/llm/get_relevant_summary.py @@ -4,7 +4,6 @@ from .extraction.categorize_relevant_summary import categorize_relevant_summary logger = logging.getLogger(__name__) - async def get_cognitive_layers(content: str, categories: List[Dict]): try: cognify_config = get_cognify_config() diff --git a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py index ffb16f6d..8ebd5d5c 100644 --- a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py +++ b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py @@ -3,7 +3,7 @@ from typing import Type from pydantic import BaseModel from cognee.infrastructure.databases.vector import get_vector_engine, DataPoint -from cognee.modules.data.extraction.data_summary.models.TextSummary import TextSummary +from cognee.tasks.chunk_extract_summary.models.TextSummary import TextSummary from cognee.modules.data.extraction.extract_summary import extract_summary from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk diff --git a/cognee/modules/data/extraction/data_summary/models/TextSummary.py b/cognee/tasks/chunk_extract_summary/models/TextSummary.py similarity index 100% rename from cognee/modules/data/extraction/data_summary/models/TextSummary.py rename to cognee/tasks/chunk_extract_summary/models/TextSummary.py diff --git a/cognee/modules/data/extraction/data_summary/__init__.py b/cognee/tasks/save_chunks_to_store/__init__.py similarity index 100% rename from cognee/modules/data/extraction/data_summary/__init__.py rename to cognee/tasks/save_chunks_to_store/__init__.py diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 1480020d..b19d63d6 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -6,6 +6,7 @@ logging.basicConfig(level = logging.DEBUG) async def main(): + data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_library")).resolve()) cognee.config.data_root_directory(data_directory_path) cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_library")).resolve()) @@ -69,4 +70,4 @@ async def main(): if __name__ == "__main__": import asyncio - asyncio.run(main()) + asyncio.run(main(), debug=True) From 85160da3876ae7baafa7c9cd91ab24993fd32758 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 13:37:55 +0200 Subject: [PATCH 08/17] Refactor of the tasks --- cognee/api/v1/cognify/cognify_v2.py | 32 +- .../classification/classify_text_chunks.py | 152 -------- cognee/modules/cognify/vector/__init__.py | 1 - .../cognify/vector/save_data_chunks.py | 97 ------ .../establish_graph_topology.py | 20 -- .../knowledge_graph/expand_knowledge_graph.py | 218 ------------ .../extract_knowledge_graph.py | 4 - .../extract_knowledge_graph_module.py | 91 ----- cognee/modules/topology/__init__.py | 0 cognee/modules/topology/example_data.json | 25 -- .../062c22df-d99b-599f-90cd-2d325c8bcf69.txt | 6 - .../6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7.txt | 16 - .../Natural language processing.txt | 2 - .../bab90046-1d9b-598c-8711-dab30f501915.txt | 1 - .../modules/topology/extraction/__init__.py | 0 .../topology/extraction/extract_topology.py | 14 - .../modules/topology/infer_data_topology.py | 20 -- .../document_to_ontology}/__init__.py | 0 .../document_to_ontology.py} | 58 +++- .../document_to_ontology/models/models.py} | 0 docs/index.md | 325 +++++++++++++++++- 21 files changed, 392 insertions(+), 690 deletions(-) delete mode 100644 cognee/modules/classification/classify_text_chunks.py delete mode 100644 cognee/modules/cognify/vector/__init__.py delete mode 100644 cognee/modules/cognify/vector/save_data_chunks.py delete mode 100644 cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py delete mode 100644 cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py delete mode 100644 cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph.py delete mode 100644 cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py delete mode 100644 cognee/modules/topology/__init__.py delete mode 100644 cognee/modules/topology/example_data.json delete mode 100644 cognee/modules/topology/explanations/062c22df-d99b-599f-90cd-2d325c8bcf69.txt delete mode 100644 cognee/modules/topology/explanations/6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7.txt delete mode 100644 cognee/modules/topology/explanations/Natural language processing.txt delete mode 100644 cognee/modules/topology/explanations/bab90046-1d9b-598c-8711-dab30f501915.txt delete mode 100644 cognee/modules/topology/extraction/__init__.py delete mode 100644 cognee/modules/topology/extraction/extract_topology.py delete mode 100644 cognee/modules/topology/infer_data_topology.py rename cognee/{modules/classification => tasks/document_to_ontology}/__init__.py (100%) rename cognee/{modules/topology/topology.py => tasks/document_to_ontology/document_to_ontology.py} (74%) rename cognee/{modules/topology/topology_data_models.py => tasks/document_to_ontology/models/models.py} (100%) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index b2a03ebf..497815dd 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -9,14 +9,6 @@ from cognee.modules.data.processing.document_types.ImageDocument import ImageDocument from cognee.shared.data_models import KnowledgeGraph from cognee.modules.data.processing.document_types import PdfDocument, TextDocument -# from cognee.modules.cognify.vector import save_data_chunks -# from cognee.modules.data.processing.process_documents import process_documents -# from cognee.modules.classification.classify_text_chunks import classify_text_chunks -# from cognee.modules.data.extraction.data_summary.summarize_text_chunks import summarize_text_chunks -# from cognee.modules.data.processing.filter_affected_chunks import filter_affected_chunks -# from cognee.modules.data.processing.remove_obsolete_chunks import remove_obsolete_chunks -# from cognee.modules.data.extraction.knowledge_graph.expand_knowledge_graph import expand_knowledge_graph -# from cognee.modules.data.extraction.knowledge_graph.establish_graph_topology import establish_graph_topology from cognee.modules.data.models import Dataset, Data from cognee.modules.data.operations.get_dataset_data import get_dataset_data from cognee.modules.data.operations.retrieve_datasets import retrieve_datasets @@ -31,6 +23,7 @@ from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task +from cognee.tasks.document_to_ontology.document_to_ontology import document_to_ontology from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store_task from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task from cognee.tasks.chunks_into_graph.chunks_into_graph import \ @@ -96,19 +89,20 @@ async def run_cognify_pipeline(dataset: Dataset): cognee_config = get_cognify_config() graph_config = get_graph_config() root_node_id = None - - if graph_config.infer_graph_topology and graph_config.graph_topology_task: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - root_node_id = await topology_engine.add_graph_topology(files = data) - elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: - from cognee.modules.topology.topology import TopologyEngine - topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - await topology_engine.add_graph_topology(graph_config.topology_file_path) - elif not graph_config.graph_topology_task: - root_node_id = "ROOT" + # + # if graph_config.infer_graph_topology and graph_config.graph_topology_task: + # from cognee.modules.topology.topology import TopologyEngine + # topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + # root_node_id = await topology_engine.add_graph_topology(files = data) + # elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: + # from cognee.modules.topology.topology import TopologyEngine + # topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + # await topology_engine.add_graph_topology(graph_config.topology_file_path) + # elif not graph_config.graph_topology_task: + # root_node_id = "ROOT" tasks = [ + Task(document_to_ontology, root_node_id = root_node_id), Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data Task(chunks_into_graph_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes diff --git a/cognee/modules/classification/classify_text_chunks.py b/cognee/modules/classification/classify_text_chunks.py deleted file mode 100644 index 1e6be7e8..00000000 --- a/cognee/modules/classification/classify_text_chunks.py +++ /dev/null @@ -1,152 +0,0 @@ - -import asyncio -from uuid import uuid5, NAMESPACE_OID -from typing import Type -from pydantic import BaseModel -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.vector import get_vector_engine, DataPoint -from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -from ..data.extraction.extract_categories import extract_categories - -async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): - if len(data_chunks) == 0: - return data_chunks - - chunk_classifications = await asyncio.gather( - *[extract_categories(chunk.text, classification_model) for chunk in data_chunks], - ) - - classification_data_points = [] - - for chunk_index, chunk in enumerate(data_chunks): - chunk_classification = chunk_classifications[chunk_index] - classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type)) - classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type)) - - for classification_subclass in chunk_classification.label.subclass: - classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value)) - - vector_engine = get_vector_engine() - - class Keyword(BaseModel): - uuid: str - text: str - chunk_id: str - document_id: str - - collection_name = "classification" - - if await vector_engine.has_collection(collection_name): - existing_data_points = await vector_engine.retrieve( - collection_name, - list(set(classification_data_points)), - ) if len(classification_data_points) > 0 else [] - - existing_points_map = {point.id: True for point in existing_data_points} - else: - existing_points_map = {} - await vector_engine.create_collection(collection_name, payload_schema = Keyword) - - data_points = [] - nodes = [] - edges = [] - - for (chunk_index, data_chunk) in enumerate(data_chunks): - chunk_classification = chunk_classifications[chunk_index] - classification_type_label = chunk_classification.label.type - classification_type_id = uuid5(NAMESPACE_OID, classification_type_label) - - if classification_type_id not in existing_points_map: - data_points.append( - DataPoint[Keyword]( - id = str(classification_type_id), - payload = Keyword.parse_obj({ - "uuid": str(classification_type_id), - "text": classification_type_label, - "chunk_id": str(data_chunk.chunk_id), - "document_id": str(data_chunk.document_id), - }), - embed_field = "text", - ) - ) - - nodes.append(( - str(classification_type_id), - dict( - id = str(classification_type_id), - name = classification_type_label, - type = classification_type_label, - ) - )) - existing_points_map[classification_type_id] = True - - edges.append(( - str(data_chunk.chunk_id), - str(classification_type_id), - "is_media_type", - dict( - relationship_name = "is_media_type", - source_node_id = str(data_chunk.chunk_id), - target_node_id = str(classification_type_id), - ), - )) - - for classification_subclass in chunk_classification.label.subclass: - classification_subtype_label = classification_subclass.value - classification_subtype_id = uuid5(NAMESPACE_OID, classification_subtype_label) - - if classification_subtype_id not in existing_points_map: - data_points.append( - DataPoint[Keyword]( - id = str(classification_subtype_id), - payload = Keyword.parse_obj({ - "uuid": str(classification_subtype_id), - "text": classification_subtype_label, - "chunk_id": str(data_chunk.chunk_id), - "document_id": str(data_chunk.document_id), - }), - embed_field = "text", - ) - ) - - nodes.append(( - str(classification_subtype_id), - dict( - id = str(classification_subtype_id), - name = classification_subtype_label, - type = classification_subtype_label, - ) - )) - edges.append(( - str(classification_subtype_id), - str(classification_type_id), - "is_subtype_of", - dict( - relationship_name = "contains", - source_node_id = str(classification_type_id), - target_node_id = str(classification_subtype_id), - ), - )) - - existing_points_map[classification_subtype_id] = True - - edges.append(( - str(data_chunk.chunk_id), - str(classification_subtype_id), - "is_classified_as", - dict( - relationship_name = "is_classified_as", - source_node_id = str(data_chunk.chunk_id), - target_node_id = str(classification_subtype_id), - ), - )) - - if len(nodes) > 0 or len(edges) > 0: - await vector_engine.create_data_points(collection_name, data_points) - - graph_engine = await get_graph_engine() - - await graph_engine.add_nodes(nodes) - await graph_engine.add_edges(edges) - - return data_chunks diff --git a/cognee/modules/cognify/vector/__init__.py b/cognee/modules/cognify/vector/__init__.py deleted file mode 100644 index 4edc5427..00000000 --- a/cognee/modules/cognify/vector/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .save_data_chunks import save_data_chunks diff --git a/cognee/modules/cognify/vector/save_data_chunks.py b/cognee/modules/cognify/vector/save_data_chunks.py deleted file mode 100644 index 943ba336..00000000 --- a/cognee/modules/cognify/vector/save_data_chunks.py +++ /dev/null @@ -1,97 +0,0 @@ -from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk - -async def save_data_chunks(data_chunks: list[DocumentChunk], collection_name: str): - if len(data_chunks) == 0: - return data_chunks - - vector_engine = get_vector_engine() - graph_engine = await get_graph_engine() - - # Remove and unlink existing chunks - if await vector_engine.has_collection(collection_name): - existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve( - collection_name, - [str(chunk.chunk_id) for chunk in data_chunks], - ))] - - if len(existing_chunks) > 0: - await vector_engine.delete_data_points(collection_name, [str(chunk.chunk_id) for chunk in existing_chunks]) - - await graph_engine.remove_connection_to_successors_of([chunk.chunk_id for chunk in existing_chunks], "next_chunk") - await graph_engine.remove_connection_to_predecessors_of([chunk.chunk_id for chunk in existing_chunks], "has_chunk") - else: - await vector_engine.create_collection(collection_name, payload_schema = DocumentChunk) - - # Add to vector storage - await vector_engine.create_data_points( - collection_name, - [ - DataPoint[DocumentChunk]( - id = str(chunk.chunk_id), - payload = chunk, - embed_field = "text", - ) for chunk in data_chunks - ], - ) - - # Add to graph storage - chunk_nodes = [] - chunk_edges = [] - - for chunk in data_chunks: - chunk_nodes.append(( - str(chunk.chunk_id), - dict( - id = str(chunk.chunk_id), - chunk_id = str(chunk.chunk_id), - document_id = str(chunk.document_id), - word_count = chunk.word_count, - chunk_index = chunk.chunk_index, - cut_type = chunk.cut_type, - pages = chunk.pages, - ) - )) - - chunk_edges.append(( - str(chunk.document_id), - str(chunk.chunk_id), - "has_chunk", - dict( - relationship_name = "has_chunk", - source_node_id = str(chunk.document_id), - target_node_id = str(chunk.chunk_id), - ), - )) - - previous_chunk_id = get_previous_chunk_id(data_chunks, chunk) - - if previous_chunk_id is not None: - chunk_edges.append(( - str(previous_chunk_id), - str(chunk.chunk_id), - "next_chunk", - dict( - relationship_name = "next_chunk", - source_node_id = str(previous_chunk_id), - target_node_id = str(chunk.chunk_id), - ), - )) - - await graph_engine.add_nodes(chunk_nodes) - await graph_engine.add_edges(chunk_edges) - - return data_chunks - - -def get_previous_chunk_id(document_chunks: list[DocumentChunk], current_chunk: DocumentChunk) -> DocumentChunk: - if current_chunk.chunk_index == 0: - return current_chunk.document_id - - for chunk in document_chunks: - if str(chunk.document_id) == str(current_chunk.document_id) \ - and chunk.chunk_index == current_chunk.chunk_index - 1: - return chunk.chunk_id - - return None diff --git a/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py b/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py deleted file mode 100644 index b2141a50..00000000 --- a/cognee/modules/data/extraction/knowledge_graph/establish_graph_topology.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Type -from pydantic import BaseModel -from cognee.shared.data_models import KnowledgeGraph -from cognee.infrastructure.databases.graph import get_graph_engine -from ...processing.chunk_types.DocumentChunk import DocumentChunk -from .add_model_class_to_graph import add_model_class_to_graph - -async def chunk_to_graph_decomposition(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): - if topology_model == KnowledgeGraph: - return data_chunks - - graph_engine = await get_graph_engine() - - await add_model_class_to_graph(topology_model, graph_engine) - - return data_chunks - - -def generate_node_id(node_id: str) -> str: - return node_id.upper().replace(" ", "_").replace("'", "") diff --git a/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py b/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py deleted file mode 100644 index 0a959df5..00000000 --- a/cognee/modules/data/extraction/knowledge_graph/expand_knowledge_graph.py +++ /dev/null @@ -1,218 +0,0 @@ -import json -import asyncio -from uuid import uuid5, NAMESPACE_OID -from datetime import datetime, timezone -from typing import Type -from pydantic import BaseModel -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine -from ...processing.chunk_types.DocumentChunk import DocumentChunk -from .extract_knowledge_graph import extract_content_graph - -class EntityNode(BaseModel): - uuid: str - name: str - type: str - description: str - created_at: datetime - updated_at: datetime - -async def expand_knowledge_graph(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): - chunk_graphs = await asyncio.gather( - *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] - ) - - vector_engine = get_vector_engine() - graph_engine = await get_graph_engine() - - has_collection = await vector_engine.has_collection(collection_name) - - if not has_collection: - await vector_engine.create_collection(collection_name, payload_schema = EntityNode) - - processed_nodes = {} - type_node_edges = [] - entity_node_edges = [] - type_entity_edges = [] - - for (chunk_index, chunk) in enumerate(data_chunks): - chunk_graph = chunk_graphs[chunk_index] - for node in chunk_graph.nodes: - type_node_id = generate_node_id(node.type) - entity_node_id = generate_node_id(node.id) - - if type_node_id not in processed_nodes: - type_node_edges.append((str(chunk.chunk_id), type_node_id, "contains_entity_type")) - processed_nodes[type_node_id] = True - - if entity_node_id not in processed_nodes: - entity_node_edges.append((str(chunk.chunk_id), entity_node_id, "contains_entity")) - type_entity_edges.append((entity_node_id, type_node_id, "is_entity_type")) - processed_nodes[entity_node_id] = True - - graph_node_edges = [ - (edge.source_node_id, edge.target_node_id, edge.relationship_name) \ - for edge in chunk_graph.edges - ] - - existing_edges = await graph_engine.has_edges([ - *type_node_edges, - *entity_node_edges, - *type_entity_edges, - *graph_node_edges, - ]) - - existing_edges_map = {} - existing_nodes_map = {} - - for edge in existing_edges: - existing_edges_map[edge[0] + edge[1] + edge[2]] = True - existing_nodes_map[edge[0]] = True - - graph_nodes = [] - graph_edges = [] - data_points = [] - - for (chunk_index, chunk) in enumerate(data_chunks): - graph = chunk_graphs[chunk_index] - if graph is None: - continue - - for node in graph.nodes: - node_id = generate_node_id(node.id) - node_name = generate_name(node.name) - - type_node_id = generate_node_id(node.type) - type_node_name = generate_name(node.type) - - if node_id not in existing_nodes_map: - node_data = dict( - uuid = node_id, - name = node_name, - type = node_name, - description = node.description, - created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - ) - - graph_nodes.append(( - node_id, - dict( - **node_data, - properties = json.dumps(node.properties), - ) - )) - - data_points.append(DataPoint[EntityNode]( - id = str(uuid5(NAMESPACE_OID, node_id)), - payload = node_data, - embed_field = "name", - )) - - existing_nodes_map[node_id] = True - - edge_key = str(chunk.chunk_id) + node_id + "contains_entity" - - if edge_key not in existing_edges_map: - graph_edges.append(( - str(chunk.chunk_id), - node_id, - "contains_entity", - dict( - relationship_name = "contains_entity", - source_node_id = str(chunk.chunk_id), - target_node_id = node_id, - ), - )) - - # Add relationship between entity type and entity itself: "Jake is Person" - graph_edges.append(( - node_id, - type_node_id, - "is_entity_type", - dict( - relationship_name = "is_entity_type", - source_node_id = type_node_id, - target_node_id = node_id, - ), - )) - - existing_edges_map[edge_key] = True - - if type_node_id not in existing_nodes_map: - type_node_data = dict( - uuid = type_node_id, - name = type_node_name, - type = type_node_id, - description = type_node_name, - created_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - updated_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"), - ) - - graph_nodes.append((type_node_id, dict( - **type_node_data, - properties = json.dumps(node.properties) - ))) - - data_points.append(DataPoint[EntityNode]( - id = str(uuid5(NAMESPACE_OID, type_node_id)), - payload = type_node_data, - embed_field = "name", - )) - - existing_nodes_map[type_node_id] = True - - edge_key = str(chunk.chunk_id) + type_node_id + "contains_entity_type" - - if edge_key not in existing_edges_map: - graph_edges.append(( - str(chunk.chunk_id), - type_node_id, - "contains_entity_type", - dict( - relationship_name = "contains_entity_type", - source_node_id = str(chunk.chunk_id), - target_node_id = type_node_id, - ), - )) - - existing_edges_map[edge_key] = True - - # Add relationship that came from graphs. - for edge in graph.edges: - source_node_id = generate_node_id(edge.source_node_id) - target_node_id = generate_node_id(edge.target_node_id) - relationship_name = generate_name(edge.relationship_name) - edge_key = source_node_id + target_node_id + relationship_name - - if edge_key not in existing_edges_map: - graph_edges.append(( - generate_node_id(edge.source_node_id), - generate_node_id(edge.target_node_id), - edge.relationship_name, - dict( - relationship_name = generate_name(edge.relationship_name), - source_node_id = generate_node_id(edge.source_node_id), - target_node_id = generate_node_id(edge.target_node_id), - properties = json.dumps(edge.properties), - ), - )) - existing_edges_map[edge_key] = True - - if len(data_points) > 0: - await vector_engine.create_data_points(collection_name, data_points) - - if len(graph_nodes) > 0: - await graph_engine.add_nodes(graph_nodes) - - if len(graph_edges) > 0: - await graph_engine.add_edges(graph_edges) - - return data_chunks - - -def generate_name(name: str) -> str: - return name.lower().replace(" ", "_").replace("'", "") - -def generate_node_id(node_id: str) -> str: - return node_id.lower().replace(" ", "_").replace("'", "") diff --git a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph.py b/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph.py deleted file mode 100644 index 6cedb9d5..00000000 --- a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph.py +++ /dev/null @@ -1,4 +0,0 @@ -from .extract_content_graph import extract_content_graph - -async def extract_knowledge_graph(text: str, cognitive_layer, graph_model): - return await extract_content_graph(text, cognitive_layer, graph_model) diff --git a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py b/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py deleted file mode 100644 index 9e4b3849..00000000 --- a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py +++ /dev/null @@ -1,91 +0,0 @@ -from typing import List -import dspy -import nltk -from nltk.corpus import stopwords -from nltk.tokenize import word_tokenize -from cognee.infrastructure.llm import get_llm_config -from cognee.shared.data_models import KnowledgeGraph, Node, Edge -from cognee.shared.utils import trim_text_to_max_tokens - -# """Instructions: -# You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph. -# - **Nodes** represent entities and concepts. They're akin to Wikipedia nodes. -# - **Edges** represent relationships between concepts. They're akin to Wikipedia links. -# Extract as much information as you can from the text and build a detailed knowledge graph. -# If question is provided, make sure that the information to answer the question is present in the graph.""" - -class GraphFromText(dspy.Signature): - """Instructions: - You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph. - - **Nodes** represent entities and concepts, akin to Wikipedia nodes. - - **Edges** represent relationships between entities and concepts, akin to Wikipedia hyperlinks. - Extract information from the text and build a detailed knowledge graph.""" - - text: str = dspy.InputField() - graph: KnowledgeGraph = dspy.OutputField() - - -def are_all_nodes_and_edges_valid(graph: KnowledgeGraph) -> bool: - return all([getattr(node, "type", "").strip() != "" for node in graph.nodes]) and \ - all([getattr(node, "name", "").strip() != "" for node in graph.nodes]) and \ - all([getattr(edge, "relationship_name", "").strip() != "" for edge in graph.edges]) - -def is_node_connected(node: Node, edges: List[Edge]) -> bool: - return any([(edge.source_node_id == node.id or edge.target_node_id == node.id) for edge in edges]) - -def are_all_nodes_connected(graph: KnowledgeGraph) -> bool: - return all([is_node_connected(node, graph.edges) for node in graph.nodes]) - - -class ExtractKnowledgeGraph(dspy.Module): - llm_config = get_llm_config() - - def __init__(self, lm = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)): - super().__init__() - self.lm = lm - dspy.settings.configure(lm=self.lm) - self.generate_graph = dspy.TypedChainOfThought(GraphFromText) - nltk.download("stopwords", quiet = True) - - def forward(self, context: str, question: str): - context = remove_stop_words(context) - context = trim_text_to_max_tokens(context, 1500, self.llm_config.llm_model) - - with dspy.context(lm = self.lm): - graph = self.generate_graph(text = context).graph - - not_valid_nodes_or_edges_message = """ - All nodes must contain "name". - All edges must contain "relationship_name". - Please add mandatory fields to nodes and edges.""" - - dspy.Suggest(are_all_nodes_and_edges_valid(graph), not_valid_nodes_or_edges_message) - - # not_connected_graph_message = """ - # Output must be a graph that has all nodes connected to it. - # Please find a relation and connect nodes or remove them.""" - - # dspy.Suggest(are_all_nodes_connected(graph), not_connected_graph_message) - - return dspy.Prediction(context = context, graph = graph) - - -def remove_stop_words(text): - stop_words = set(stopwords.words("english")) - word_tokens = word_tokenize(text) - filtered_text = [word for word in word_tokens if word.lower() not in stop_words] - return " ".join(filtered_text) - -# -# if __name__ == "__main__": -# gpt_4_turbo = dspy.OpenAI(model="gpt-4", max_tokens=4000, api_key=config.llm_api_key, model_type="chat") -# dspy.settings.configure(lm=gpt_4_turbo) - - -# extract_knowledge_graph = ExtractKnowledgeGraph(lm=gpt_4_turbo) -# # graph_text = extract_knowledge_graph("cognitive_layer", "text") -# graph = extract_knowledge_graph("analysis_layer", """A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word. -# LLMs are artificial neural networks. The largest and most capable, as of March 2024""", question="What is a large language model?") -# print("GPT4 History:", gpt_4_turbo.inspect_history(n=1)) -# print(graph) -# diff --git a/cognee/modules/topology/__init__.py b/cognee/modules/topology/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/cognee/modules/topology/example_data.json b/cognee/modules/topology/example_data.json deleted file mode 100644 index 7e802916..00000000 --- a/cognee/modules/topology/example_data.json +++ /dev/null @@ -1,25 +0,0 @@ -[ - { - "node_id": "062c22dfd99b599f90cd2d325c8bcf69", - "name": "062c22df-d99b-599f-90cd-2d325c8bcf69", - "default_relationship": { - "type": "related_to", - "source": "062c22dfd99b599f90cd2d325c8bcf69", - "target": "6dfe01b607d25b7783c81d6c11ce2aa7" - }, - "children": [ - { - "node_id": "6dfe01b607d25b7783c81d6c11ce2aa7", - "name": "6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7", - "default_relationship": { - "type": "related_to", - "source": "6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7", - "target": "a27bb4fa897e53a594cab446e1d33dbf" - }, - "children": [] - } - ] - } -] - - diff --git a/cognee/modules/topology/explanations/062c22df-d99b-599f-90cd-2d325c8bcf69.txt b/cognee/modules/topology/explanations/062c22df-d99b-599f-90cd-2d325c8bcf69.txt deleted file mode 100644 index 48e82306..00000000 --- a/cognee/modules/topology/explanations/062c22df-d99b-599f-90cd-2d325c8bcf69.txt +++ /dev/null @@ -1,6 +0,0 @@ -A quantum computer is a computer that takes advantage of quantum mechanical phenomena. -At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. -Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. -The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. -Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. -In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. diff --git a/cognee/modules/topology/explanations/6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7.txt b/cognee/modules/topology/explanations/6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7.txt deleted file mode 100644 index 783ee55f..00000000 --- a/cognee/modules/topology/explanations/6dfe01b6-07d2-5b77-83c8-1d6c11ce2aa7.txt +++ /dev/null @@ -1,16 +0,0 @@ -U analizi komunikacionih sistema kod životinja zadržaćemo se samo na semiotičkim problemima – postoje li u pojedinim sistemima njihove komunikacije ZNACI, semiotički SISTEMI i neke semiotičke OPERACIJE, u onom smislu kako su ti pojmovi definisani i utvrđeni kod ljudi. Analiziraćemo sličnosti i razlike između komunikacije kod životinja i kod ljudi, posebno semiotičke komunikacije kod čoveka. -Kada se ima u vidu bogatstvo oblika komunikativnih veza među životinjama: sva raznolikost signala u pogledu fizičkih svojstava – hemijski, oflaktivni (mirisni), akustički (uključiv i ultrazvukove), električni, motorički (kinezički), proksemički (položaj u prostoru), vizuelni i drugi, zatim – raznovrsnost kanala (sredina) kroz koje se ostvaruje veza, kao i raznovrsnost funkcija koje imaju komunikativni sistemi, pitanje je koliko je uopšte opravdano govoriti o komunikaciji životinja u celini. -Međutim, kada se pristupi semiotičkoj analizi sistema komunikacije među životinjama, iza raznolikosti nalazi se prilična jednoličnost, čak tolika da se ne može utvrditi postoji li nekakvo usavršavanje sistema komunikacije duž evolucione lestvice. -Pogledajmo najpre kakve FUNKCIJE opslužuju sistemi komunikacija kod životinja. Poznati istraživač ovih problema, Marler, ovako rezimira analizu komunikacije među nižim i višim majmunima: „U velikoj većini, celokupni sistem komunikacije izgleda postoji radi organizacije socijalnog ponašanja grupe, regulacije dominantnosti i subordinacije, održanja mira i kohezije u grupi, kao i radi reprodukcije i brige o mladima (Marleu, 1967). Pomenute funkcije mogle bi se, nešto raščlanjenije, ovako opisati: -Postoje, najpre, kod nekih vrsta signali za identifikaciju pojedinaca (npr. parovi mužjaka i ženki kod ptica pevačica mogu ostati u stalnoj vezi tokom cele jedne godine i međusobno se identifikuju pomoću pevanja, ponekada u vidu dueta koji može izvesti samo određeni par) ili za identifikaciju vrste (npr. pčele-stražari ubijaju na ulazu u košnicu svaku jedinku koja se na osnovu signala ne može identifikovati kao član tog pčelinjeg društva). -Najbrojniji i najraznovrsniji su signali koji saopštavaju o motivacionim i afektivnim stanjima jedinke i o promenama tih stanja, a često i o suptilnim nijansama raspoloženja. Ta vrsta signala kazuje o gladi, seksualnim potrebama, ugroženostistrahu, boli, uzbuđenju, naklonosti, neprijateljstvu i agresivnosti, o zadovoljstvu i o svim varijacijama ovakvih motivacionih stanja i raspoloženja. -Izuzetnu biološku vrednost imaju signali koji regulišu međusobne odnose jedinki ili odnose u grupi kao celini. Podsticanje, dozivanje i približavanje partnera, privlačenje i parenje, međusobno prepoznavanje i saradnja između roditelja i mladunčadi – nemogući su bez nekog sistema signalizacije. Određivanje zauzete teritorije, okupljanje grupe, vođstvo i određivanje statusa u grupi, dominacija i potčinjavanje, organizovanje kolonija – to su samo neke od socijalnih funkcija koje poslužuju sistemi komunikacije. -U svim sistemima komunikacije među životinjama upadljivo je najmanje onih poruka koje govore o okolini u kojoj životinja živi; samo kod pojedinih vrsta postoje signali koji saopštavaju o postojanju ili lokalizaciji napadača, o hrani ili nalazištu hrane, o lokaciji staništa. -Kada se ima u vidu samo ono što je ovde pobrojano, stiče se utisak o bogatstvu informacija koje mogu preneti komunikativni sistemi životinja. Međutim, za pun uvid u prirodu tih sistema potrebno je videti na koji način sistemi komuniciranja kod životinja obavljaju te funkcije. Obično se kaže da ovi sistemi imaju, pre svega ili isključivo socijalne funkcije. To je tačno, ali pod uslovom da se prethodno razjasni šta ovde znači – socijalno. Funkcije tih sistema su socijalne najpre su smislu da signali UTIČU NA DRUGE jedinke. I upravo tako, utiču na druge jedinke, a ne upućeni su drugim jedinkama. U stvari, u razvoju odnosa među jedinkama tokom evolucije izgrađuje se svojevrsna socijalna simbioza, u kojoj neki vidljivi pokazatelji ponašanja jedne jedinke postaju obaveštenja (signali) o njenim motivacionim stanjima. Dakle, signali su (za razliku od SIMBOLA) samo pokazatelji i sastavni delovi motivacionih, afektivnih ili nekih drugih unutrašnjih stanja jedinke. I baš zbog toga se ne može reći da jedinka upućuje drugoj signale, ona prosto doživljava to što doživljava. Neke komponente doživljaja dostupne su opažanju drugih jedinki i u toku zajedničkog života postaju signali određenih stanja. Tokom evolucije ti signali se stabilizuju, stilizuju (ritualizuju) i prerađuju u određeni sistem komunikacije. -U tako stvorenoj socijalnoj simbiozi, signali koje upućuju životinje jedne drugima pre su nalozi za izvođenje određenih radnji, tj. pokretači ili inhibitori radnji nego saopštene informacije. Hormonalne i druge promene u organizmu koje dovode do pojave signala u jednoj jedinki skoro automatski u određenim uslovima pokreću lanac hormonalnih i ostalih promena u jedinki koja prima signale, a te promene kod njih izazivaju određene radnje. Dakle, komunikativni sistemi imaju socijalne funkcije zato što menjaju ponašanje drugih jedinki. A to dalje znači da jedina jedinka kojoj životinja ne upućuje signale jeste ona sama, i to je jedna od suštinskih razlika semiotičke komunikacije čoveka i komunikacije među žvotinjama. -Iz prethodno opisanog sledi i ovo: signal retko kada ima isto značenje za jedinku koja ga emituje i jedinku koja ga prima. U komunikaciji među životinjama više se radi o odnosima komplementarnosti nego o odnosima recipročnosti (baš kao u nekim neverbalnim vidovima komunikacije kod ljudi: onaj koji pokazuje gnev i onaj ko opaža gnev imaju različite doživljaje). U ovom pogledu signali više služe za socijalnu facilitaciju ponašanja i sinhronizaciju fizioloških stanja i motoričkih radnji u toku interindividualnih aktivnosti (nrp. parenja) ili grupnih aktivnosti (npr. u komunikaciji pčela) nego za socijalnu razmenu. -Potrebno je ukazati na još jednu odliku komunikativnih sistema životinja: postoji uska specijalizacija signala za određene funkcije. Svaka od ranije pobrojanih funkcija ima specifične signale koji joj služe (npr. krici kod ptica su signali opasnosti, a pesma ptica-pevačica je ljubavni zov). Ovo, naravno, ne znači da za obavljanje jedne vrste funkcija postoji samo jedna vrsta signala, jer je i u sistemima veze između životinja pravilo da postoji redundansa (npr. za saopštavanje o mestu nalazišta paše pčele koriste istovremeno i „jezik“ igre telom i akustičke signale). -U pogledu SEMANTIKE (značenja), sistemi komunikacija među životinjama poseduju dva osnovna svojstva: -Postoji konačan i obično veoma mali broj poruka koje stoje svakoj vrsti na raspolaganju – značajnija je karakteristika da je broj signala konačan. Naime, životinja svake vrste dobija nasleđem, ili stiče uz izvesno učenje, određen broj signala i taj repertoar ostaje zatvoren, nepromenljiv. Za razliku od toga, ljudski govor je otvoreni sistem, koji po svojim pravilima stvara nove jedinice sistema. -Jedva da poneki istraživač saopštava da je zapazio stvaranje novih signala kod životinja. Tamo gde je signale moguće tehnički pobrojati, nalazi se da njihov broj nije veliki. Tako kod pojedinih vrsta majmuna istraživači redovno utvrđuju da se broj akustičkih signala kreće od 10-15, dok se kod šimpanzi može razlikovati 9 facijanih ekspresija. Ni kod drugih životinja taj broj nije mnogo veći: kod nekih jedva da postoje dva signala u istom kanalu veze, a jedino se kod nekih ptica-pevačica sreće i do nekoliko stotina različitih „motiva“ u pesmama. Pošto većinu komunikativnih sredstava kojima se služe životinje dobijaju nasleđem, ta nasledna određenost vrlo je striktna i u pogledu funkcije i forme signala, tako da je životinja sposobna za komunikaciju koja je karakteristična za njenu vrstu, čak i kada se razvija u izolaciji. Kod nekih vrsta ptica učenje ima značajniju ulogu. Poznato je da mladi nekih vrsta ptica mogu da nauče i pesmu drugih vrsta ptica, u čijoj zajednici odrastaju, dok kod nekih vrsta ptica određene grupacije jedinki stvaraju svoje „dijalekte“. Izgleda da je funkcija tih dijalekata da iz nekih razloga ograniče parenje među pripadnicima različitih grupacija, jer ptice mogu da se pare samo na osnovu ljubavne pesme onog „dijalekta“ kojim se služe. Ovakav način sticanja komunikativnih sistema veoma podseća na usvajanje govora kod dece. -Signali nemaju denotativna značenja, tj. ne označavaju neki određeni segment realnosti (denotat), ne saopštavaju nešto o tom denotatu, već samo predstavljaju vid ekspresije stanja organizma. Za semiotičku analizu posebno je značajno da li signali koje životinje koriste zaista označavaju nešto različito od sebe samih, da li kazuju nešto o denotatu, ili su puka ekspresija fizioloških i afektivnih stanja životinje. \ No newline at end of file diff --git a/cognee/modules/topology/explanations/Natural language processing.txt b/cognee/modules/topology/explanations/Natural language processing.txt deleted file mode 100644 index a6fad3b4..00000000 --- a/cognee/modules/topology/explanations/Natural language processing.txt +++ /dev/null @@ -1,2 +0,0 @@ -Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. It is primarily concerned with giving computers the ability to support and manipulate human language. It involves processing natural language datasets, such as text corpora or speech corpora, using either rule-based or probabilistic (i.e. statistical and, most recently, neural network-based) machine learning approaches. The goal is a computer capable of "understanding"[citation needed] the contents of documents, including the contextual nuances of the language within them. To this end, natural language processing often borrows ideas from theoretical linguistics. The technology can then accurately extract information and insights contained in the documents as well as categorize and organize the documents themselves. -Challenges in natural language processing frequently involve speech recognition, natural-language understanding, and natural-language generation. diff --git a/cognee/modules/topology/explanations/bab90046-1d9b-598c-8711-dab30f501915.txt b/cognee/modules/topology/explanations/bab90046-1d9b-598c-8711-dab30f501915.txt deleted file mode 100644 index 09d6d68d..00000000 --- a/cognee/modules/topology/explanations/bab90046-1d9b-598c-8711-dab30f501915.txt +++ /dev/null @@ -1 +0,0 @@ -German novels are fun to read and talk about nature \ No newline at end of file diff --git a/cognee/modules/topology/extraction/__init__.py b/cognee/modules/topology/extraction/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/cognee/modules/topology/extraction/extract_topology.py b/cognee/modules/topology/extraction/extract_topology.py deleted file mode 100644 index 6f68cb03..00000000 --- a/cognee/modules/topology/extraction/extract_topology.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Type -from pydantic import BaseModel -from cognee.infrastructure.llm.prompts import read_query_prompt -from cognee.infrastructure.llm.get_llm_client import get_llm_client - - -async def extract_topology(content: str, response_model: Type[BaseModel]): - llm_client = get_llm_client() - - system_prompt = read_query_prompt("extract_topology.txt") - - llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model) - - return llm_output.model_dump() diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py deleted file mode 100644 index ada7ea49..00000000 --- a/cognee/modules/topology/infer_data_topology.py +++ /dev/null @@ -1,20 +0,0 @@ -import logging -from cognee.modules.topology.extraction.extract_topology import extract_topology -from cognee.infrastructure.databases.graph.config import get_graph_config - -logger = logging.getLogger(__name__) - -async def infer_data_topology(content: str, graph_topology=None): - if graph_topology is None: - graph_config = get_graph_config() - graph_topology = graph_config.graph_topology - - print("content: ", type(content)) - try: - return (await extract_topology( - content, - graph_topology - )) - except Exception as error: - logger.error("Error extracting topology from content: %s", error, exc_info = True) - raise error diff --git a/cognee/modules/classification/__init__.py b/cognee/tasks/document_to_ontology/__init__.py similarity index 100% rename from cognee/modules/classification/__init__.py rename to cognee/tasks/document_to_ontology/__init__.py diff --git a/cognee/modules/topology/topology.py b/cognee/tasks/document_to_ontology/document_to_ontology.py similarity index 74% rename from cognee/modules/topology/topology.py rename to cognee/tasks/document_to_ontology/document_to_ontology.py index 9f8c82be..314d3b49 100644 --- a/cognee/modules/topology/topology.py +++ b/cognee/tasks/document_to_ontology/document_to_ontology.py @@ -1,5 +1,7 @@ """ This module contains the TopologyEngine class which is responsible for adding graph topology from a JSON or CSV file. """ +from cognee.infrastructure.databases.graph import get_graph_config +from cognee.modules.cognify.config import get_cognify_config import csv import json import logging @@ -14,10 +16,42 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException -from cognee.modules.topology.topology_data_models import NodeModel +from cognee.tasks.document_to_ontology.models.models import NodeModel logger = logging.getLogger("topology") +from cognee.infrastructure.databases.graph.config import get_graph_config + + +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.llm.prompts import read_query_prompt +from cognee.infrastructure.llm.get_llm_client import get_llm_client + + +async def extract_topology(content: str, response_model: Type[BaseModel]): + llm_client = get_llm_client() + + system_prompt = read_query_prompt("extract_topology.txt") + + llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model) + + return llm_output.model_dump() + + +async def infer_data_topology(content: str, graph_topology=None): + if graph_topology is None: + graph_config = get_graph_config() + graph_topology = graph_config.graph_topology + try: + return (await extract_topology( + content, + graph_topology + )) + except Exception as error: + logger.error("Error extracting topology from content: %s", error, exc_info = True) + raise error + class TopologyEngine: def __init__(self, infer:bool) -> None: self.models: Dict[str, Type[BaseModel]] = {} @@ -69,8 +103,6 @@ async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[st async def add_graph_topology(self, file_path: str = None, files: list = None): """Add graph topology from a JSON or CSV file.""" if self.infer: - from cognee.modules.topology.infer_data_topology import infer_data_topology - initial_chunks_and_ids = [] chunk_config = get_chunk_config() @@ -128,3 +160,23 @@ async def add_graph_topology(self, file_path: str = None, files: list = None): return except Exception as e: raise RuntimeError(f"Failed to add graph topology from {file_path}: {e}") from e + + + + + +async def document_to_ontology(data, root_node_id): + cognee_config = get_cognify_config() + graph_config = get_graph_config() + root_node_id = None + if graph_config.infer_graph_topology and graph_config.graph_topology_task: + topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + root_node_id = await topology_engine.add_graph_topology(files=data) + elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: + + topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) + await topology_engine.add_graph_topology(graph_config.topology_file_path) + elif not graph_config.graph_topology_task: + root_node_id = "ROOT" + + yield (data, root_node_id) \ No newline at end of file diff --git a/cognee/modules/topology/topology_data_models.py b/cognee/tasks/document_to_ontology/models/models.py similarity index 100% rename from cognee/modules/topology/topology_data_models.py rename to cognee/tasks/document_to_ontology/models/models.py diff --git a/docs/index.md b/docs/index.md index 068b319d..4755a8e7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,58 +1,381 @@ # cognee + #### Deterministic LLMs Outputs for AI Engineers + _Open-source framework for loading and structuring LLM context to create accurate and explainable AI solutions using knowledge graphs and vector stores_ + --- + [![Twitter Follow](https://img.shields.io/twitter/follow/tricalt?style=social)](https://twitter.com/tricalt) + [![Downloads](https://img.shields.io/pypi/dm/cognee.svg)](https://pypi.python.org/pypi/cognee) + [![Star on GitHub](https://img.shields.io/github/stars/topoteretes/cognee.svg?style=social)](https://github.com/topoteretes/cognee) + ### Let's learn about cogneeHub! + cogneeHub is a free, open-source learning platform for those interested in creating deterministic LLM outputs. We help developers by using graphs, LLMs, and adding vector retrieval to their Machine Learning stack. + - **Get started** — [Get started with cognee quickly and try it out for yourself.](quickstart.md) + - **Conceptual Overview** — Learn about the [core concepts](conceptual_overview.md) of cognee and how it fits into your projects. + - **Data Engineering and LLMOps** — Learn about some [data engineering and llmops](data_engineering_llm_ops.md) core concepts that will help you build better AI apps. + - **RAGs** — We provide easy-to-follow [learning materials](rags.md) to help you learn about RAGs. + - **Research** — A list of resources to help you learn more about [cognee and LLM memory research](research.md) + - **Blog** — A blog where you can read about the [latest news and updates](blog/index.md) about cognee. + - **Support** — [Book time](https://www.cognee.ai/#bookTime) with our team. -[//]: # (- **Case Studies** — Read about [case studies](case_studies.md) that show how cognee can be used in real-world applications.) + +[//]: # (- **Case Studies** — Read about [case studies](case_studies.md) that show how cognee can be used in real-world applications.) ### Vision + ![Vision](img/roadmap.png) + ### Architecture + ![Architecture](img/architecture.png) + ### Why use cognee? + The question of using cognee is fundamentally a question of why to have deterministic outputs for your llm workflows. + 1. **Cost-effective** — cognee extends the capabilities of your LLMs without the need for expensive data processing tools. + 2. **Self-contained** — cognee runs as a library and is simple to use + 3. **Interpretable** — Navigate graphs instead of embeddings to understand your data. + 4. **User Guided** — cognee lets you control your input and provide your own Pydantic data models + ## License + This project is licensed under the terms of the Apache License 2.0. + + +[//]: # () + +[//]: # () +[//]: # () +[//]: # (# New to cognee?) + +[//]: # () +[//]: # () +[//]: # (The getting started guide covers adding a GraphRAG data store to your AI app, sending events, identifying users, extracting actions and insights, and interconnecting separate datasets.) + +[//]: # () +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # ( Get started) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (

Ingest Data

) + +[//]: # () +[//]: # (

Learn how to manage ingestion of events, customer data or third party data for use with cognee.

) + +[//]: # () +[//]: # ( Explore) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (

Templates

) + +[//]: # () +[//]: # (

Analyze and enrich your data and improve LLM answers with a series of templates using cognee tasks and pipelines.

) + +[//]: # () +[//]: # ( Browse templates) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (

API

) + +[//]: # () +[//]: # (

Push or pull data to build custom functionality or create bespoke views for your business needs.

) + +[//]: # () +[//]: # ( Explore) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # () +[//]: # (
) + +[//]: # () +[//]: # (

Resources

) + +[//]: # () +[//]: # ( ) + +[//]: # () +[//]: # (
) From 156c7bec689569d39db96950cb601b17d01ba90d Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 13:47:03 +0200 Subject: [PATCH 09/17] Refactor of the tasks --- .../source_documents_to_chunks/source_documents_to_chunks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py b/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py index 116f020a..419200e0 100644 --- a/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py +++ b/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py @@ -5,6 +5,10 @@ async def source_documents_to_chunks(documents: list[Document], parent_node_id: str = None, user:str=None, user_permissions:str=None): graph_engine = await get_graph_engine() + if parent_node_id is None: + documents, parent_node_id = documents + + nodes = [] edges = [] From 1c9bbd7a43114cdc34497e62bfd9ee68b67551c5 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:08:19 +0200 Subject: [PATCH 10/17] Refactor of the tasks --- cognee/api/v1/cognify/cognify_v2.py | 12 ------------ .../relational/sqlalchemy/SqlAlchemyAdapter.py | 2 ++ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 497815dd..616c710c 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -89,18 +89,6 @@ async def run_cognify_pipeline(dataset: Dataset): cognee_config = get_cognify_config() graph_config = get_graph_config() root_node_id = None - # - # if graph_config.infer_graph_topology and graph_config.graph_topology_task: - # from cognee.modules.topology.topology import TopologyEngine - # topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - # root_node_id = await topology_engine.add_graph_topology(files = data) - # elif graph_config.infer_graph_topology and not graph_config.infer_graph_topology: - # from cognee.modules.topology.topology import TopologyEngine - # topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) - # await topology_engine.add_graph_topology(graph_config.topology_file_path) - # elif not graph_config.graph_topology_task: - # root_node_id = "ROOT" - tasks = [ Task(document_to_ontology, root_node_id = root_node_id), Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index 0bec5cd9..c52ad92c 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -70,11 +70,13 @@ async def create_table(self, schema_name: str, table_name: str, table_config: li async def delete_table(self, table_name: str): async with self.engine.begin() as connection: await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;")) + await connection.close() async def insert_data(self, schema_name: str, table_name: str, data: list[dict]): columns = ", ".join(data[0].keys()) values = ", ".join([f"({', '.join([f':{key}' for key in row.keys()])})" for row in data]) insert_query = text(f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES {values};") + async with self.engine.begin() as connection: await connection.execute(insert_query, data) await connection.close() From 4675a8f3231a797182c74272e149c7a2ab5e0265 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:10:43 +0200 Subject: [PATCH 11/17] Refactor of the tasks --- cognee/api/v1/cognify/cognify_v2.py | 14 +++++++------- .../chunk_extract_summary/chunk_extract_summary.py | 2 +- .../chunk_naive_llm_classifier.py | 2 +- .../chunk_remove_disconnected.py | 2 +- .../chunk_to_graph_decomposition.py | 2 +- .../tasks/chunk_update_check/chunk_update_check.py | 2 +- .../tasks/chunks_into_graph/chunks_into_graph.py | 2 +- .../save_chunks_to_store/save_chunks_to_store.py | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 616c710c..7a187600 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -92,25 +92,25 @@ async def run_cognify_pipeline(dataset: Dataset): tasks = [ Task(document_to_ontology, root_node_id = root_node_id), Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type - Task(chunk_to_graph_decomposition_task, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data - Task(chunks_into_graph_task, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes - Task(chunk_update_check_task, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks + Task(chunk_to_graph_decomposition, topology_model = KnowledgeGraph, task_config = { "batch_size": 10 }), # Set the graph topology for the document chunk data + Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = "entities"), # Generate knowledge graphs from the document chunks and attach it to chunk nodes + Task(chunk_update_check, collection_name = "chunks"), # Find all affected chunks, so we don't process unchanged chunks Task( - save_chunks_to_store_task, + save_chunks_to_store, collection_name = "chunks", ), # Save the document chunks in vector db and as nodes in graph db (connected to the document node and between each other) run_tasks_parallel([ Task( - chunk_extract_summary_task, + chunk_extract_summary, summarization_model = cognee_config.summarization_model, collection_name = "chunk_summaries", ), # Summarize the document chunks Task( - chunk_naive_llm_classifier_task, + chunk_naive_llm_classifier, classification_model = cognee_config.classification_model, ), ]), - Task(chunk_remove_disconnected_task), # Remove the obsolete document chunks. + Task(chunk_remove_disconnected), # Remove the obsolete document chunks. ] pipeline = run_tasks(tasks, documents) diff --git a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py index 8ebd5d5c..01387b2c 100644 --- a/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py +++ b/cognee/tasks/chunk_extract_summary/chunk_extract_summary.py @@ -8,7 +8,7 @@ from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def chunk_extract_summary_task(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel], collection_name: str = "summaries"): +async def chunk_extract_summary(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel], collection_name: str = "summaries"): if len(data_chunks) == 0: return data_chunks diff --git a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py index 4e75f87e..6db89c10 100644 --- a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py @@ -8,7 +8,7 @@ from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def chunk_naive_llm_classifier_task(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): +async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classification_model: Type[BaseModel]): if len(data_chunks) == 0: return data_chunks diff --git a/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py b/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py index 0f046987..3ab59a65 100644 --- a/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py +++ b/cognee/tasks/chunk_remove_disconnected/chunk_remove_disconnected.py @@ -6,7 +6,7 @@ # from cognee.infrastructure.databases.vector import get_vector_engine -async def chunk_remove_disconnected_task(data_chunks: list[DocumentChunk]) -> list[DocumentChunk]: +async def chunk_remove_disconnected(data_chunks: list[DocumentChunk]) -> list[DocumentChunk]: graph_engine = await get_graph_engine() document_ids = set((data_chunk.document_id for data_chunk in data_chunks)) diff --git a/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py b/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py index 6f9c936d..2e2c3b17 100644 --- a/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py +++ b/cognee/tasks/chunk_to_graph_decomposition/chunk_to_graph_decomposition.py @@ -7,7 +7,7 @@ from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph -async def chunk_to_graph_decomposition_task(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): +async def chunk_to_graph_decomposition(data_chunks: list[DocumentChunk], topology_model: Type[BaseModel]): if topology_model == KnowledgeGraph: return data_chunks diff --git a/cognee/tasks/chunk_update_check/chunk_update_check.py b/cognee/tasks/chunk_update_check/chunk_update_check.py index cd532b68..2bd05241 100644 --- a/cognee/tasks/chunk_update_check/chunk_update_check.py +++ b/cognee/tasks/chunk_update_check/chunk_update_check.py @@ -2,7 +2,7 @@ from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def chunk_update_check_task(data_chunks: list[DocumentChunk], collection_name: str) -> list[DocumentChunk]: +async def chunk_update_check(data_chunks: list[DocumentChunk], collection_name: str) -> list[DocumentChunk]: vector_engine = get_vector_engine() if not await vector_engine.has_collection(collection_name): diff --git a/cognee/tasks/chunks_into_graph/chunks_into_graph.py b/cognee/tasks/chunks_into_graph/chunks_into_graph.py index 40747ffb..c4cbfcdc 100644 --- a/cognee/tasks/chunks_into_graph/chunks_into_graph.py +++ b/cognee/tasks/chunks_into_graph/chunks_into_graph.py @@ -18,7 +18,7 @@ class EntityNode(BaseModel): created_at: datetime updated_at: datetime -async def chunks_into_graph_task(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): +async def chunks_into_graph(data_chunks: list[DocumentChunk], graph_model: Type[BaseModel], collection_name: str): chunk_graphs = await asyncio.gather( *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) diff --git a/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py index 6ae4fb89..710dce55 100644 --- a/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py +++ b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py @@ -2,7 +2,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.data.processing.chunk_types.DocumentChunk import DocumentChunk -async def save_chunks_to_store_task(data_chunks: list[DocumentChunk], collection_name: str): +async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name: str): if len(data_chunks) == 0: return data_chunks From 64bec1f1a450998df17f14c334578fc586399be2 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:11:28 +0200 Subject: [PATCH 12/17] Refactor of the tasks --- cognee/api/v1/cognify/cognify_v2.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 7a187600..00f5bc3c 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -19,15 +19,15 @@ from cognee.modules.users.permissions.methods import check_permissions_on_documents from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status -from cognee.tasks.chunk_extract_summary.chunk_extract_summary import chunk_extract_summary_task -from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier_task -from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected_task -from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition_task +from cognee.tasks.chunk_extract_summary.chunk_extract_summary import chunk_extract_summary +from cognee.tasks.chunk_naive_llm_classifier.chunk_naive_llm_classifier import chunk_naive_llm_classifier +from cognee.tasks.chunk_remove_disconnected.chunk_remove_disconnected import chunk_remove_disconnected +from cognee.tasks.chunk_to_graph_decomposition.chunk_to_graph_decomposition import chunk_to_graph_decomposition from cognee.tasks.document_to_ontology.document_to_ontology import document_to_ontology -from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store_task -from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check_task +from cognee.tasks.save_chunks_to_store.save_chunks_to_store import save_chunks_to_store +from cognee.tasks.chunk_update_check.chunk_update_check import chunk_update_check from cognee.tasks.chunks_into_graph.chunks_into_graph import \ - chunks_into_graph_task + chunks_into_graph from cognee.tasks.source_documents_to_chunks.source_documents_to_chunks import source_documents_to_chunks logger = logging.getLogger("cognify.v2") From 00cd503ee3e1c30ae6c52cdeb624844892f80ec6 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:30:46 +0200 Subject: [PATCH 13/17] fix poetry --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 53c5681b..335d6b8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,6 @@ weaviate = ["weaviate-client"] qdrant = ["qdrant-client"] neo4j = ["neo4j", "py2neo"] notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] -langchain = ["langfuse"] poetry install cognee --extras=langchain From d5024cbe883a9cc7833a6ff1edf8771eb8e2e290 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:31:39 +0200 Subject: [PATCH 14/17] fix poetry --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 335d6b8d..489348fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,9 +84,6 @@ qdrant = ["qdrant-client"] neo4j = ["neo4j", "py2neo"] notebook = ["ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] - -poetry install cognee --extras=langchain - [tool.poetry.group.dev.dependencies] pytest = "^7.4.0" pytest-asyncio = "^0.21.1" From 3d1afe74edad3d73fce248fcdddb26634bb957bd Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:53:30 +0200 Subject: [PATCH 15/17] fix poetry --- .github/workflows/test_common.yml | 84 +++---------------------------- 1 file changed, 8 insertions(+), 76 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index b28fd977..8165b339 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -24,21 +24,8 @@ jobs: strategy: fail-fast: false matrix: - # os: ["ubuntu-latest", "macos-latest"] os: ["ubuntu-latest"] steps: - # - name: Install Docker - # run: | - # HOMEBREW_NO_AUTO_UPDATE=1 brew install --cask docker - # sudo /Applications/Docker.app/Contents/MacOS/Docker --unattended --install-privileged-components - # open -a /Applications/Docker.app --args --unattended --accept-license - # echo "We are waiting for Docker to be up and running. It can take over 2 minutes..." - # while ! /Applications/Docker.app/Contents/Resources/bin/docker info &>/dev/null; do sleep 1; done - # if: runner.os == 'macOS' - - # - name: Set up QEMU - # uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -49,16 +36,13 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest"] #, "windows-latest", "macos-latest" + os: ["ubuntu-latest"] python-version: ["3.11.x"] - # Test all python versions on ubuntu only include: - python-version: "3.9.x" os: "ubuntu-22.04" - python-version: "3.10.x" os: "ubuntu-22.04" -# - python-version: "3.12.x" -# os: "ubuntu-latest" defaults: run: @@ -75,6 +59,11 @@ jobs: - postgres_data:/var/lib/postgresql/data ports: - 5432:5432 + options: >- + --health-cmd="pg_isready -U $POSTGRES_USER" + --health-interval=10s + --health-timeout=5s + --health-retries=5 runs-on: ${{ matrix.os }} @@ -100,7 +89,6 @@ jobs: shell: cmd - name: Install Poetry - # https://github.com/snok/install-poetry#running-on-windows uses: snok/install-poetry@v1.3.2 with: virtualenvs-create: true @@ -118,7 +106,7 @@ jobs: - name: Wait for PostgreSQL to be ready run: | echo "Waiting for PostgreSQL to be ready..." - until pg_isready -h localhost -p 5432 -U cognee; do + until docker exec ${RUNNER_USER_NAME}-postgres pg_isready -U cognee; do sleep 1 done @@ -155,62 +143,6 @@ jobs: cd dist pip install *.whl - # - run: | - # poetry run - # if: runner.os != 'Windows' - # name: Run common tests with minimum dependencies Linux/MAC - # env: - # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} - # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} - - # - run: | - # poetry run python ./cognee/tests/test_library.py - # if: runner.os == 'Windows' - # name: Run common tests with minimum dependencies Windows - # env: - # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} - # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} - # shell: cmd - - # - name: Install dependencies - # run: poetry install --no-interaction - - # - name: Build with Poetry - # run: poetry build - - # - name: Install Package - # run: | - # cd dist - # pip install *.whl - - # - name: Download NLTK Punkt Tokenizer Models - # run: | - # python -m nltk.downloader punkt - # - run: | - # poetry run python ./cognee/tests/test_library.py - # if: runner.os != 'Windows' - # name: Run pipeline smoke tests with minimum deps Linux/MAC - # env: - # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} - # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} - - # - name: Download NLTK Punkt Tokenizer Models - # run: | - # python -m nltk.downloader punkt - # - run: | - # poetry run python ./cognee/tests/test_library.py - # if: runner.os == 'Windows' - # name: Run smoke tests with minimum deps Windows - # env: - # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} - # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} - # shell: cmd - - matrix_job_required_check: name: common | common tests needs: run_common @@ -220,4 +152,4 @@ jobs: - name: Check matrix job results if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') run: | - echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1 \ No newline at end of file + echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1 From c76652a36889f37b0cd6846c3f982381453d4beb Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 17:57:33 +0200 Subject: [PATCH 16/17] fix poetry --- .github/workflows/test_common.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 8165b339..795b6629 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -106,7 +106,7 @@ jobs: - name: Wait for PostgreSQL to be ready run: | echo "Waiting for PostgreSQL to be ready..." - until docker exec ${RUNNER_USER_NAME}-postgres pg_isready -U cognee; do + until docker run --rm --network container:${{ services.postgres.id }} postgres:latest pg_isready -U cognee; do sleep 1 done From 7f9a1031eedc8c76df80d6707665e2cba6a7301b Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 8 Aug 2024 18:07:36 +0200 Subject: [PATCH 17/17] fix tests --- .github/workflows/test_common.yml | 72 +++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 795b6629..f3b632df 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -24,6 +24,7 @@ jobs: strategy: fail-fast: false matrix: + # os: ["ubuntu-latest", "macos-latest"] os: ["ubuntu-latest"] steps: - name: Set up Docker Buildx @@ -36,13 +37,16 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest"] + os: ["ubuntu-latest"] #, "windows-latest", "macos-latest" python-version: ["3.11.x"] + # Test all python versions on ubuntu only include: - python-version: "3.9.x" os: "ubuntu-22.04" - python-version: "3.10.x" os: "ubuntu-22.04" +# - python-version: "3.12.x" +# os: "ubuntu-latest" defaults: run: @@ -59,11 +63,6 @@ jobs: - postgres_data:/var/lib/postgresql/data ports: - 5432:5432 - options: >- - --health-cmd="pg_isready -U $POSTGRES_USER" - --health-interval=10s - --health-timeout=5s - --health-retries=5 runs-on: ${{ matrix.os }} @@ -89,6 +88,7 @@ jobs: shell: cmd - name: Install Poetry + # https://github.com/snok/install-poetry#running-on-windows uses: snok/install-poetry@v1.3.2 with: virtualenvs-create: true @@ -106,7 +106,7 @@ jobs: - name: Wait for PostgreSQL to be ready run: | echo "Waiting for PostgreSQL to be ready..." - until docker run --rm --network container:${{ services.postgres.id }} postgres:latest pg_isready -U cognee; do + until pg_isready -h localhost -p 5432 -U cognee; do sleep 1 done @@ -143,6 +143,62 @@ jobs: cd dist pip install *.whl + # - run: | + # poetry run + # if: runner.os != 'Windows' + # name: Run common tests with minimum dependencies Linux/MAC + # env: + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} + # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} + + # - run: | + # poetry run python ./cognee/tests/test_library.py + # if: runner.os == 'Windows' + # name: Run common tests with minimum dependencies Windows + # env: + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} + # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} + # shell: cmd + + # - name: Install dependencies + # run: poetry install --no-interaction + + # - name: Build with Poetry + # run: poetry build + + # - name: Install Package + # run: | + # cd dist + # pip install *.whl + + # - name: Download NLTK Punkt Tokenizer Models + # run: | + # python -m nltk.downloader punkt + # - run: | + # poetry run python ./cognee/tests/test_library.py + # if: runner.os != 'Windows' + # name: Run pipeline smoke tests with minimum deps Linux/MAC + # env: + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} + # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} + + # - name: Download NLTK Punkt Tokenizer Models + # run: | + # python -m nltk.downloader punkt + # - run: | + # poetry run python ./cognee/tests/test_library.py + # if: runner.os == 'Windows' + # name: Run smoke tests with minimum deps Windows + # env: + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} + # QDRANT_API_URL: ${{ secrets.QDRANT_API_URL }} + # shell: cmd + + matrix_job_required_check: name: common | common tests needs: run_common @@ -152,4 +208,4 @@ jobs: - name: Check matrix job results if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') run: | - echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1 + echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1 \ No newline at end of file