diff --git a/cognee/tasks/infer_data_ontology/infer_data_ontology.py b/cognee/tasks/infer_data_ontology/infer_data_ontology.py index bbf6b2e7c..6415eb005 100644 --- a/cognee/tasks/infer_data_ontology/infer_data_ontology.py +++ b/cognee/tasks/infer_data_ontology/infer_data_ontology.py @@ -89,7 +89,7 @@ async def add_graph_ontology(self, file_path: str = None, documents: list = None chunk_strategy = chunk_config.chunk_strategy for base_file in documents: - with open(base_file.file_path, "rb") as file: + with open(base_file.raw_data_location, "rb") as file: try: file_type = guess_file_type(file) text = extract_text_from_file(file, file_type) @@ -175,7 +175,7 @@ async def infer_data_ontology(documents, ontology_model = KnowledgeGraph, root_n ontology_engine = OntologyEngine() root_node_id = await ontology_engine.add_graph_ontology(documents = documents) else: - graph_engine = get_graph_engine() + graph_engine = await get_graph_engine() await add_model_class_to_graph(ontology_model, graph_engine) yield (documents, root_node_id) diff --git a/notebooks/cognee_demo_1.5.ipynb b/notebooks/cognee_demo_1.5.ipynb index 0b43ae2e6..4cf72171b 100644 --- a/notebooks/cognee_demo_1.5.ipynb +++ b/notebooks/cognee_demo_1.5.ipynb @@ -3,8 +3,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:39.250021Z", - "start_time": "2024-09-20T05:11:30.139927Z" + "end_time": "2024-09-20T14:02:47.336283Z", + "start_time": "2024-09-20T14:02:43.652444Z" } }, "cell_type": "code", @@ -46,8 +46,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:44.669755Z", - "start_time": "2024-09-20T05:11:44.666218Z" + "end_time": "2024-09-20T14:02:48.519686Z", + "start_time": "2024-09-20T14:02:48.515589Z" } }, "cell_type": "code", @@ -80,13 +80,13 @@ ], "id": "df16431d0f48b006", "outputs": [], - "execution_count": 3 + "execution_count": 2 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:45.277312Z", - "start_time": "2024-09-20T05:11:45.272644Z" + "end_time": "2024-09-20T14:02:49.120838Z", + "start_time": "2024-09-20T14:02:49.118294Z" } }, "cell_type": "code", @@ -125,13 +125,13 @@ ], "id": "9086abf3af077ab4", "outputs": [], - "execution_count": 4 + "execution_count": 3 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:45.753246Z", - "start_time": "2024-09-20T05:11:45.751035Z" + "end_time": "2024-09-20T14:02:49.675003Z", + "start_time": "2024-09-20T14:02:49.671615Z" } }, "cell_type": "code", @@ -169,13 +169,13 @@ ], "id": "a9de0cc07f798b7f", "outputs": [], - "execution_count": 5 + "execution_count": 4 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:46.238147Z", - "start_time": "2024-09-20T05:11:46.235463Z" + "end_time": "2024-09-20T14:02:50.286828Z", + "start_time": "2024-09-20T14:02:50.284369Z" } }, "cell_type": "code", @@ -213,13 +213,13 @@ ], "id": "185ff1c102d06111", "outputs": [], - "execution_count": 6 + "execution_count": 5 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:46.706952Z", - "start_time": "2024-09-20T05:11:46.705079Z" + "end_time": "2024-09-20T14:02:50.950343Z", + "start_time": "2024-09-20T14:02:50.946378Z" } }, "cell_type": "code", @@ -255,13 +255,13 @@ ], "id": "d55ce4c58f8efb67", "outputs": [], - "execution_count": 7 + "execution_count": 6 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:47.329735Z", - "start_time": "2024-09-20T05:11:47.326711Z" + "end_time": "2024-09-20T14:02:51.548191Z", + "start_time": "2024-09-20T14:02:51.545520Z" } }, "cell_type": "code", @@ -297,13 +297,13 @@ ], "id": "ca4ecc32721ad332", "outputs": [], - "execution_count": 8 + "execution_count": 7 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:11:50.271409Z", - "start_time": "2024-09-20T05:11:48.529182Z" + "end_time": "2024-09-20T14:02:54.243987Z", + "start_time": "2024-09-20T14:02:52.498195Z" } }, "cell_type": "code", @@ -325,15 +325,15 @@ "output_type": "stream", "text": [ "sys:1: SAWarning: TypeDecorator UUID() will not produce a cache key because the ``cache_ok`` attribute is not set to True. This can have significant performance implications including some performance degradations in comparison to prior SQLAlchemy versions. Set this attribute to True if this type object's state is safe to use in a cache key, or False to disable this warning. (Background on this warning at: https://sqlalche.me/e/20/cprf)\n", - "2024-09-20 07:11:48,925|[WARNING]|43272|8480345088|dlt|utils.py|resolve_merge_strategy:223|Destination does not support any merge strategies and `merge` write disposition for table `file_metadata` cannot be met and will fall back to `append`. Change write disposition.2024-09-20 07:11:49,246|[WARNING]|43272|8480345088|dlt|utils.py|resolve_merge_strategy:223|Destination does not support any merge strategies and `merge` write disposition for table `file_metadata` cannot be met and will fall back to `append`. Change write disposition." + "2024-09-20 16:02:52,880|[WARNING]|53377|8480345088|dlt|utils.py|resolve_merge_strategy:223|Destination does not support any merge strategies and `merge` write disposition for table `file_metadata` cannot be met and will fall back to `append`. Change write disposition.2024-09-20 16:02:53,216|[WARNING]|53377|8480345088|dlt|utils.py|resolve_merge_strategy:223|Destination does not support any merge strategies and `merge` write disposition for table `file_metadata` cannot be met and will fall back to `append`. Change write disposition." ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "{'1726809108.767254': [{'started_at': DateTime(2024, 9, 20, 5, 11, 49, 245902, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 9, 20, 5, 11, 50, 268147, tzinfo=Timezone('UTC')), 'job_metrics': {'_dlt_pipeline_state.a91de88b58.typed-jsonl': LoadJobMetrics(job_id='_dlt_pipeline_state.a91de88b58.typed-jsonl', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/normalized/1726809108.767254/started_jobs/_dlt_pipeline_state.a91de88b58.0.typed-jsonl', table_name='_dlt_pipeline_state', started_at=DateTime(2024, 9, 20, 5, 11, 49, 256768, tzinfo=Timezone('UTC')), finished_at=DateTime(2024, 9, 20, 5, 11, 49, 258007, tzinfo=Timezone('UTC')), state='completed', remote_url=None), 'file_metadata.a639414384.typed-jsonl': LoadJobMetrics(job_id='file_metadata.a639414384.typed-jsonl', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/normalized/1726809108.767254/started_jobs/file_metadata.a639414384.0.typed-jsonl', table_name='file_metadata', started_at=DateTime(2024, 9, 20, 5, 11, 49, 257362, tzinfo=Timezone('UTC')), finished_at=DateTime(2024, 9, 20, 5, 11, 49, 259366, tzinfo=Timezone('UTC')), state='completed', remote_url=None)}}]}\n", + "\n", + "{'1726840972.7469912': [{'started_at': DateTime(2024, 9, 20, 14, 2, 53, 214773, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 9, 20, 14, 2, 54, 239978, tzinfo=Timezone('UTC')), 'job_metrics': {'file_metadata.a5a9170f4c.typed-jsonl': LoadJobMetrics(job_id='file_metadata.a5a9170f4c.typed-jsonl', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/normalized/1726840972.7469912/started_jobs/file_metadata.a5a9170f4c.0.typed-jsonl', table_name='file_metadata', started_at=DateTime(2024, 9, 20, 14, 2, 53, 220755, tzinfo=Timezone('UTC')), finished_at=DateTime(2024, 9, 20, 14, 2, 53, 222174, tzinfo=Timezone('UTC')), state='completed', remote_url=None)}}]}\n", "dlt.destinations.sqlalchemy\n", "sqlite:///cognee_db\n", "sqlalchemy\n", @@ -343,108 +343,69 @@ "None\n", "\n", "example\n", - "['1726809108.767254']\n", - "[LoadPackageInfo(load_id='1726809108.767254', package_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1726809108.767254', state='loaded', schema=Schema file_load_from_filesystem at 6016359056, schema_update={'_dlt_loads': {'name': '_dlt_loads', 'columns': {'load_id': {'name': 'load_id', 'data_type': 'text', 'nullable': False}, 'schema_name': {'name': 'schema_name', 'data_type': 'text', 'nullable': True}, 'status': {'name': 'status', 'data_type': 'bigint', 'nullable': False}, 'inserted_at': {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, 'schema_version_hash': {'name': 'schema_version_hash', 'data_type': 'text', 'nullable': True}}, 'write_disposition': 'skip', 'resource': '_dlt_loads', 'description': 'Created by DLT. Tracks completed loads'}, 'file_metadata': {'columns': {'id': {'name': 'id', 'nullable': False, 'merge_key': True, 'data_type': 'text'}, 'name': {'name': 'name', 'data_type': 'text', 'nullable': True}, 'file_path': {'name': 'file_path', 'data_type': 'text', 'nullable': True}, 'extension': {'name': 'extension', 'data_type': 'text', 'nullable': True}, 'mime_type': {'name': 'mime_type', 'data_type': 'text', 'nullable': True}, '_dlt_load_id': {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, '_dlt_id': {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True, 'row_key': True}}, 'name': 'file_metadata', 'write_disposition': 'merge', 'resource': 'data_resources', 'x-normalizer': {'seen-data': True}}, '_dlt_pipeline_state': {'columns': {'version': {'name': 'version', 'data_type': 'bigint', 'nullable': False}, 'engine_version': {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, 'pipeline_name': {'name': 'pipeline_name', 'data_type': 'text', 'nullable': False}, 'state': {'name': 'state', 'data_type': 'text', 'nullable': False}, 'created_at': {'name': 'created_at', 'data_type': 'timestamp', 'nullable': False}, 'version_hash': {'name': 'version_hash', 'data_type': 'text', 'nullable': True}, '_dlt_load_id': {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, '_dlt_id': {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True, 'row_key': True}}, 'write_disposition': 'append', 'file_format': 'preferred', 'name': '_dlt_pipeline_state', 'resource': '_dlt_pipeline_state', 'x-normalizer': {'seen-data': True}}, '_dlt_version': {'name': '_dlt_version', 'columns': {'version': {'name': 'version', 'data_type': 'bigint', 'nullable': False}, 'engine_version': {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, 'inserted_at': {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, 'schema_name': {'name': 'schema_name', 'data_type': 'text', 'nullable': False}, 'version_hash': {'name': 'version_hash', 'data_type': 'text', 'nullable': False}, 'schema': {'name': 'schema', 'data_type': 'text', 'nullable': False}}, 'write_disposition': 'skip', 'resource': '_dlt_version', 'description': 'Created by DLT. Tracks schema updates'}}, completed_at=DateTime(2024, 9, 20, 5, 11, 50, 264990, tzinfo=Timezone('UTC')), jobs={'completed_jobs': [LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1726809108.767254/completed_jobs/_dlt_pipeline_state.a91de88b58.0.typed-jsonl', file_size=514, created_at=DateTime(2024, 9, 20, 5, 11, 48, 927162, tzinfo=Timezone('UTC')), elapsed=1.3378276824951172, job_file_info=ParsedLoadJobFileName(table_name='_dlt_pipeline_state', file_id='a91de88b58', retry_count=0, file_format='typed-jsonl'), failed_message=None), LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1726809108.767254/completed_jobs/file_metadata.a639414384.0.typed-jsonl', file_size=668, created_at=DateTime(2024, 9, 20, 5, 11, 48, 927453, tzinfo=Timezone('UTC')), elapsed=1.3375375270843506, job_file_info=ParsedLoadJobFileName(table_name='file_metadata', file_id='a639414384', retry_count=0, file_format='typed-jsonl'), failed_message=None)], 'started_jobs': [], 'new_jobs': [], 'failed_jobs': []})]\n", - "True\n" + "['1726840972.7469912']\n", + "[LoadPackageInfo(load_id='1726840972.7469912', package_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1726840972.7469912', state='loaded', schema=Schema file_load_from_filesystem at 6038968784, schema_update={}, completed_at=DateTime(2024, 9, 20, 14, 2, 54, 235948, tzinfo=Timezone('UTC')), jobs={'failed_jobs': [], 'started_jobs': [], 'completed_jobs': [LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1726840972.7469912/completed_jobs/file_metadata.a5a9170f4c.0.typed-jsonl', file_size=670, created_at=DateTime(2024, 9, 20, 14, 2, 52, 881922, tzinfo=Timezone('UTC')), elapsed=1.3540260791778564, job_file_info=ParsedLoadJobFileName(table_name='file_metadata', file_id='a5a9170f4c', retry_count=0, file_format='typed-jsonl'), failed_message=None)], 'new_jobs': []})]\n", + "False\n" ] } ], - "execution_count": 9 + "execution_count": 8 }, { - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-09-20T14:02:55.564445Z", + "start_time": "2024-09-20T14:02:55.562784Z" + } + }, "cell_type": "code", + "source": "", + "id": "6f9b564de121713d", "outputs": [], - "execution_count": null, - "source": [ - "from typing import Any, Dict, List, Optional, Union\n", - "from pydantic import BaseModel, Field\n", - "from uuid import uuid4\n", - "\n", - "class Node(BaseModel):\n", - " \"\"\"Node in a knowledge graph.\"\"\"\n", - " id: str = Field(default_factory=lambda: str(uuid4()))\n", - " name: Optional[str] = None\n", - " types: List[str] = Field(default_factory=list, description=\"Types or categories of the node (e.g., 'Person', 'Skill').\")\n", - " properties: Dict[str, Any] = Field(default_factory=dict, description=\"Properties associated with the node.\")\n", - "\n", - " def merge(self, other: 'Node'):\n", - " \"\"\"Merge another node into this one.\"\"\"\n", - " # Combine types\n", - " self.types = list(set(self.types + other.types))\n", - " # Combine properties\n", - " self.properties.update(other.properties)\n", - " # Update name if necessary\n", - " if not self.name and other.name:\n", - " self.name = other.name\n", - "\n", - "class Edge(BaseModel):\n", - " \"\"\"Edge in a knowledge graph.\"\"\"\n", - " id: str = Field(default_factory=lambda: str(uuid4()))\n", - " source_node_id: str\n", - " target_node_id: str\n", - " relationship_types: List[str] = Field(default_factory=list, description=\"Types of relationships (e.g., 'has_skill').\")\n", - " properties: Dict[str, Any] = Field(default_factory=dict, description=\"Properties associated with the edge.\")\n", - " confidence: Optional[float] = Field(None, description=\"Confidence score for the relationship.\")\n", - "\n", - "class KnowledgeGraph(BaseModel):\n", - " \"\"\"Knowledge graph.\"\"\"\n", - " nodes: Dict[str, Node] = Field(default_factory=dict)\n", - " edges: List[Edge] = Field(default_factory=list)\n", - "\n", - " def add_node(self, node: Node):\n", - " if node.id in self.nodes:\n", - " self.nodes[node.id].merge(node)\n", - " else:\n", - " self.nodes[node.id] = node\n", - "\n", - " def add_edge(self, edge: Edge):\n", - " self.edges.append(edge)" - ], - "id": "6f9b564de121713d" + "execution_count": 8 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:12:24.360343Z", - "start_time": "2024-09-20T05:12:24.335507Z" + "end_time": "2024-09-20T14:02:56.714408Z", + "start_time": "2024-09-20T14:02:56.711812Z" } }, "cell_type": "code", "source": [ - "from enum import Enum, auto\n", - "from typing import Optional, List, Union, Dict, Any\n", - "from pydantic import BaseModel, Field\n", - "\n", - "class Node(BaseModel):\n", - " \"\"\"Node in a knowledge graph.\"\"\"\n", - " id: str\n", - " name: str\n", - " type: str\n", - " description: str\n", - " properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n", - "\n", - "class Edge(BaseModel):\n", - " \"\"\"Edge in a knowledge graph.\"\"\"\n", - " source_node_id: str\n", - " target_node_id: str\n", - " relationship_name: str\n", - " properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n", - "\n", - "class KnowledgeGraph(BaseModel):\n", - " \"\"\"Knowledge graph.\"\"\"\n", - " nodes: List[Node] = Field(..., default_factory=list)\n", - " edges: List[Edge] = Field(..., default_factory=list)" + "# from enum import Enum, auto\n", + "# from typing import Optional, List, Union, Dict, Any\n", + "# from pydantic import BaseModel, Field\n", + "# \n", + "# class Node(BaseModel):\n", + "# \"\"\"Node in a knowledge graph.\"\"\"\n", + "# id: str\n", + "# name: str\n", + "# type: str\n", + "# description: str\n", + "# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n", + "# \n", + "# class Edge(BaseModel):\n", + "# \"\"\"Edge in a knowledge graph.\"\"\"\n", + "# source_node_id: str\n", + "# target_node_id: str\n", + "# relationship_name: str\n", + "# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n", + "# \n", + "# class KnowledgeGraph(BaseModel):\n", + "# \"\"\"Knowledge graph.\"\"\"\n", + "# nodes: List[Node] = Field(..., default_factory=list)\n", + "# edges: List[Edge] = Field(..., default_factory=list)" ], "id": "8911f8bd4f8c440a", "outputs": [], - "execution_count": 10 + "execution_count": 9 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:36:49.096914Z", - "start_time": "2024-09-20T05:36:49.089721Z" + "end_time": "2024-09-20T14:02:57.925667Z", + "start_time": "2024-09-20T14:02:57.922353Z" } }, "cell_type": "code", @@ -457,7 +418,9 @@ " root_node_id = None\n", "\n", " tasks = [\n", + " \n", " Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n", + " Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),\n", " Task(source_documents_to_chunks, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type\n", " Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = \"entities\", task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes\n", " Task(chunk_update_check, collection_name = \"chunks\"), # Find all affected chunks, so we don't process unchanged chunks\n", @@ -477,16 +440,81 @@ ], "id": "7c431fdef4921ae0", "outputs": [], - "execution_count": 25 + "execution_count": 10 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-09-20T05:36:49.776759Z", - "start_time": "2024-09-20T05:36:49.773859Z" + "end_time": "2024-09-20T14:02:58.905774Z", + "start_time": "2024-09-20T14:02:58.625915Z" } }, "cell_type": "code", + "source": [ + "user = await get_default_user()\n", + "datasets = await get_datasets_by_name([\"example\"], user.id)\n", + "await run_cognify_pipeline(datasets[0], user)" + ], + "id": "f0a91b99c6215e09", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Error occurred while running async generator task: `infer_data_ontology`\n", + "'Data' object has no attribute 'file_path'\n", + "Traceback (most recent call last):\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 26, in run_tasks\n", + " async for partial_result in async_iterator:\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py\", line 176, in infer_data_ontology\n", + " root_node_id = await ontology_engine.add_graph_ontology(documents = documents)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py\", line 92, in add_graph_ontology\n", + " with open(base_file.file_path, \"rb\") as file:\n", + " ^^^^^^^^^^^^^^^^^^^\n", + "AttributeError: 'Data' object has no attribute 'file_path'Error occurred while running coroutine task: `check_permissions_on_documents`\n", + "'Data' object has no attribute 'file_path'\n", + "Traceback (most recent call last):\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 86, in run_tasks\n", + " async for result in run_tasks(leftover_tasks, task_result):\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 49, in run_tasks\n", + " raise error\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 26, in run_tasks\n", + " async for partial_result in async_iterator:\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py\", line 176, in infer_data_ontology\n", + " root_node_id = await ontology_engine.add_graph_ontology(documents = documents)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py\", line 92, in add_graph_ontology\n", + " with open(base_file.file_path, \"rb\") as file:\n", + " ^^^^^^^^^^^^^^^^^^^\n", + "AttributeError: 'Data' object has no attribute 'file_path'" + ] + }, + { + "ename": "AttributeError", + "evalue": "'Data' object has no attribute 'file_path'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[11], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m user \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m get_default_user()\n\u001B[1;32m 2\u001B[0m datasets \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m get_datasets_by_name([\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mexample\u001B[39m\u001B[38;5;124m\"\u001B[39m], user\u001B[38;5;241m.\u001B[39mid)\n\u001B[0;32m----> 3\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m run_cognify_pipeline(datasets[\u001B[38;5;241m0\u001B[39m], user)\n", + "Cell \u001B[0;32mIn[10], line 27\u001B[0m, in \u001B[0;36mrun_cognify_pipeline\u001B[0;34m(dataset, user)\u001B[0m\n\u001B[1;32m 25\u001B[0m \u001B[38;5;28mprint\u001B[39m(result)\n\u001B[1;32m 26\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[0;32m---> 27\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n", + "Cell \u001B[0;32mIn[10], line 24\u001B[0m, in \u001B[0;36mrun_cognify_pipeline\u001B[0;34m(dataset, user)\u001B[0m\n\u001B[1;32m 8\u001B[0m tasks \u001B[38;5;241m=\u001B[39m [\n\u001B[1;32m 9\u001B[0m \n\u001B[1;32m 10\u001B[0m Task(check_permissions_on_documents, user \u001B[38;5;241m=\u001B[39m user, permissions \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mwrite\u001B[39m\u001B[38;5;124m\"\u001B[39m]),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 19\u001B[0m Task(chunk_remove_disconnected), \u001B[38;5;66;03m# Remove the obsolete document chunks.\u001B[39;00m\n\u001B[1;32m 20\u001B[0m ]\n\u001B[1;32m 22\u001B[0m pipeline \u001B[38;5;241m=\u001B[39m run_tasks(tasks, data_documents)\n\u001B[0;32m---> 24\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m result \u001B[38;5;129;01min\u001B[39;00m pipeline:\n\u001B[1;32m 25\u001B[0m \u001B[38;5;28mprint\u001B[39m(result)\n\u001B[1;32m 26\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:97\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 90\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[1;32m 91\u001B[0m logger\u001B[38;5;241m.\u001B[39merror(\n\u001B[1;32m 92\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mError occurred while running coroutine task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 93\u001B[0m running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m,\n\u001B[1;32m 94\u001B[0m \u001B[38;5;28mstr\u001B[39m(error),\n\u001B[1;32m 95\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 96\u001B[0m )\n\u001B[0;32m---> 97\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n\u001B[1;32m 99\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m inspect\u001B[38;5;241m.\u001B[39misfunction(running_task\u001B[38;5;241m.\u001B[39mexecutable):\n\u001B[1;32m 100\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mRunning function task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:86\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 83\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 84\u001B[0m task_result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m running_task\u001B[38;5;241m.\u001B[39mrun(\u001B[38;5;241m*\u001B[39margs)\n\u001B[0;32m---> 86\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m result \u001B[38;5;129;01min\u001B[39;00m run_tasks(leftover_tasks, task_result):\n\u001B[1;32m 87\u001B[0m \u001B[38;5;28;01myield\u001B[39;00m result\n\u001B[1;32m 89\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mFinished coroutine task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:49\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 42\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[1;32m 43\u001B[0m logger\u001B[38;5;241m.\u001B[39merror(\n\u001B[1;32m 44\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mError occurred while running async generator task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 45\u001B[0m running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m,\n\u001B[1;32m 46\u001B[0m \u001B[38;5;28mstr\u001B[39m(error),\n\u001B[1;32m 47\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 48\u001B[0m )\n\u001B[0;32m---> 49\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n\u001B[1;32m 51\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m inspect\u001B[38;5;241m.\u001B[39misgeneratorfunction(running_task\u001B[38;5;241m.\u001B[39mexecutable):\n\u001B[1;32m 52\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mRunning generator task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:26\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 22\u001B[0m results \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m 24\u001B[0m async_iterator \u001B[38;5;241m=\u001B[39m running_task\u001B[38;5;241m.\u001B[39mrun(\u001B[38;5;241m*\u001B[39margs)\n\u001B[0;32m---> 26\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m partial_result \u001B[38;5;129;01min\u001B[39;00m async_iterator:\n\u001B[1;32m 27\u001B[0m results\u001B[38;5;241m.\u001B[39mappend(partial_result)\n\u001B[1;32m 29\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(results) \u001B[38;5;241m==\u001B[39m next_task_batch_size:\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py:176\u001B[0m, in \u001B[0;36minfer_data_ontology\u001B[0;34m(documents, ontology_model, root_node_id)\u001B[0m\n\u001B[1;32m 174\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m ontology_model \u001B[38;5;241m==\u001B[39m KnowledgeGraph:\n\u001B[1;32m 175\u001B[0m ontology_engine \u001B[38;5;241m=\u001B[39m OntologyEngine()\n\u001B[0;32m--> 176\u001B[0m root_node_id \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m ontology_engine\u001B[38;5;241m.\u001B[39madd_graph_ontology(documents \u001B[38;5;241m=\u001B[39m documents)\n\u001B[1;32m 177\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 178\u001B[0m graph_engine \u001B[38;5;241m=\u001B[39m get_graph_engine()\n", + "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/infer_data_ontology/infer_data_ontology.py:92\u001B[0m, in \u001B[0;36mOntologyEngine.add_graph_ontology\u001B[0;34m(self, file_path, documents)\u001B[0m\n\u001B[1;32m 89\u001B[0m chunk_strategy \u001B[38;5;241m=\u001B[39m chunk_config\u001B[38;5;241m.\u001B[39mchunk_strategy\n\u001B[1;32m 91\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m base_file \u001B[38;5;129;01min\u001B[39;00m documents:\n\u001B[0;32m---> 92\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(\u001B[43mbase_file\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfile_path\u001B[49m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mrb\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m file:\n\u001B[1;32m 93\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 94\u001B[0m file_type \u001B[38;5;241m=\u001B[39m guess_file_type(file)\n", + "\u001B[0;31mAttributeError\u001B[0m: 'Data' object has no attribute 'file_path'" + ] + } + ], + "execution_count": 11 + }, + { + "metadata": {}, + "cell_type": "code", "source": [ "\n", "\n", @@ -524,75 +552,23 @@ ], "id": "e7d4f03f7dab9807", "outputs": [], - "execution_count": 26 + "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2024-09-20T05:36:50.641924Z", - "start_time": "2024-09-20T05:36:50.554094Z" - } - }, + "metadata": {}, "cell_type": "code", "source": "await cognify(\"example\")", "id": "d9248a01352964e2", - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Error occurred while running async generator task: `source_documents_to_chunks`\n", - "too many values to unpack (expected 2)\n", - "Traceback (most recent call last):\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 26, in run_tasks\n", - " async for partial_result in async_iterator:\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py\", line 9, in source_documents_to_chunks\n", - " documents, parent_node_id = documents\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "ValueError: too many values to unpack (expected 2)Error occurred while running coroutine task: `check_permissions_on_documents`\n", - "too many values to unpack (expected 2)\n", - "Traceback (most recent call last):\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 86, in run_tasks\n", - " async for result in run_tasks(leftover_tasks, task_result):\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 49, in run_tasks\n", - " raise error\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py\", line 26, in run_tasks\n", - " async for partial_result in async_iterator:\n", - " File \"/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py\", line 9, in source_documents_to_chunks\n", - " documents, parent_node_id = documents\n", - " ^^^^^^^^^^^^^^^^^^^^^^^^^\n", - "ValueError: too many values to unpack (expected 2)" - ] - }, - { - "ename": "ValueError", - "evalue": "too many values to unpack (expected 2)", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[27], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m cognify(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mexample\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", - "Cell \u001B[0;32mIn[26], line 30\u001B[0m, in \u001B[0;36mcognify\u001B[0;34m(datasets, user)\u001B[0m\n\u001B[1;32m 27\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m dataset_name \u001B[38;5;129;01min\u001B[39;00m existing_datasets_map:\n\u001B[1;32m 28\u001B[0m awaitables\u001B[38;5;241m.\u001B[39mappend(run_cognify_pipeline(dataset, user))\n\u001B[0;32m---> 30\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m asyncio\u001B[38;5;241m.\u001B[39mgather(\u001B[38;5;241m*\u001B[39mawaitables)\n", - "Cell \u001B[0;32mIn[25], line 24\u001B[0m, in \u001B[0;36mrun_cognify_pipeline\u001B[0;34m(dataset, user)\u001B[0m\n\u001B[1;32m 22\u001B[0m \u001B[38;5;28mprint\u001B[39m(result)\n\u001B[1;32m 23\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[0;32m---> 24\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n", - "Cell \u001B[0;32mIn[25], line 21\u001B[0m, in \u001B[0;36mrun_cognify_pipeline\u001B[0;34m(dataset, user)\u001B[0m\n\u001B[1;32m 7\u001B[0m tasks \u001B[38;5;241m=\u001B[39m [\n\u001B[1;32m 8\u001B[0m Task(check_permissions_on_documents, user \u001B[38;5;241m=\u001B[39m user, permissions \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mwrite\u001B[39m\u001B[38;5;124m\"\u001B[39m]),\n\u001B[1;32m 9\u001B[0m Task(source_documents_to_chunks, parent_node_id \u001B[38;5;241m=\u001B[39m root_node_id), \u001B[38;5;66;03m# Classify documents and save them as a nodes in graph db, extract text chunks based on the document type\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 16\u001B[0m Task(chunk_remove_disconnected), \u001B[38;5;66;03m# Remove the obsolete document chunks.\u001B[39;00m\n\u001B[1;32m 17\u001B[0m ]\n\u001B[1;32m 19\u001B[0m pipeline \u001B[38;5;241m=\u001B[39m run_tasks(tasks, data_documents)\n\u001B[0;32m---> 21\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m result \u001B[38;5;129;01min\u001B[39;00m pipeline:\n\u001B[1;32m 22\u001B[0m \u001B[38;5;28mprint\u001B[39m(result)\n\u001B[1;32m 23\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n", - "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:97\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 90\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[1;32m 91\u001B[0m logger\u001B[38;5;241m.\u001B[39merror(\n\u001B[1;32m 92\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mError occurred while running coroutine task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 93\u001B[0m running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m,\n\u001B[1;32m 94\u001B[0m \u001B[38;5;28mstr\u001B[39m(error),\n\u001B[1;32m 95\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 96\u001B[0m )\n\u001B[0;32m---> 97\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n\u001B[1;32m 99\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m inspect\u001B[38;5;241m.\u001B[39misfunction(running_task\u001B[38;5;241m.\u001B[39mexecutable):\n\u001B[1;32m 100\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mRunning function task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", - "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:86\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 83\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m 84\u001B[0m task_result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m running_task\u001B[38;5;241m.\u001B[39mrun(\u001B[38;5;241m*\u001B[39margs)\n\u001B[0;32m---> 86\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m result \u001B[38;5;129;01min\u001B[39;00m run_tasks(leftover_tasks, task_result):\n\u001B[1;32m 87\u001B[0m \u001B[38;5;28;01myield\u001B[39;00m result\n\u001B[1;32m 89\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mFinished coroutine task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", - "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:49\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 42\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mException\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m error:\n\u001B[1;32m 43\u001B[0m logger\u001B[38;5;241m.\u001B[39merror(\n\u001B[1;32m 44\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mError occurred while running async generator task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m,\n\u001B[1;32m 45\u001B[0m running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m,\n\u001B[1;32m 46\u001B[0m \u001B[38;5;28mstr\u001B[39m(error),\n\u001B[1;32m 47\u001B[0m exc_info \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m,\n\u001B[1;32m 48\u001B[0m )\n\u001B[0;32m---> 49\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m error\n\u001B[1;32m 51\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m inspect\u001B[38;5;241m.\u001B[39misgeneratorfunction(running_task\u001B[38;5;241m.\u001B[39mexecutable):\n\u001B[1;32m 52\u001B[0m logger\u001B[38;5;241m.\u001B[39minfo(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mRunning generator task: `\u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m`\u001B[39m\u001B[38;5;124m\"\u001B[39m, running_task\u001B[38;5;241m.\u001B[39mexecutable\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m)\n", - "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/modules/pipelines/operations/run_tasks.py:26\u001B[0m, in \u001B[0;36mrun_tasks\u001B[0;34m(tasks, data)\u001B[0m\n\u001B[1;32m 22\u001B[0m results \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m 24\u001B[0m async_iterator \u001B[38;5;241m=\u001B[39m running_task\u001B[38;5;241m.\u001B[39mrun(\u001B[38;5;241m*\u001B[39margs)\n\u001B[0;32m---> 26\u001B[0m \u001B[38;5;28;01masync\u001B[39;00m \u001B[38;5;28;01mfor\u001B[39;00m partial_result \u001B[38;5;129;01min\u001B[39;00m async_iterator:\n\u001B[1;32m 27\u001B[0m results\u001B[38;5;241m.\u001B[39mappend(partial_result)\n\u001B[1;32m 29\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(results) \u001B[38;5;241m==\u001B[39m next_task_batch_size:\n", - "File \u001B[0;32m~/Projects/cognee/.venv/lib/python3.11/site-packages/cognee/tasks/source_documents_to_chunks/source_documents_to_chunks.py:9\u001B[0m, in \u001B[0;36msource_documents_to_chunks\u001B[0;34m(documents, parent_node_id)\u001B[0m\n\u001B[1;32m 6\u001B[0m graph_engine \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m get_graph_engine()\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m parent_node_id \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m----> 9\u001B[0m documents, parent_node_id \u001B[38;5;241m=\u001B[39m documents\n\u001B[1;32m 12\u001B[0m nodes \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m 13\u001B[0m edges \u001B[38;5;241m=\u001B[39m []\n", - "\u001B[0;31mValueError\u001B[0m: too many values to unpack (expected 2)" - ] - } - ], - "execution_count": 27 + "outputs": [], + "execution_count": null }, { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": "", - "id": "6afc6307bd115dbe" + "id": "6afc6307bd115dbe", + "outputs": [], + "execution_count": null } ], "metadata": {