topoteretes · Vasilije1990 · Jun 15, 2024 · Jun 15, 2024 · Jun 29, 2024 · Jun 30, 2024
diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
@@ -1,4 +1,5 @@
 import asyncio
+import os
 from uuid import uuid4
 from typing import List, Union
 import logging
@@ -149,15 +150,23 @@ async def process_batch(files_batch):
     graph_config = get_graph_config()
     graph_topology = graph_config.graph_model
-    graph_topology = graph_config.graph_model
-    graph_topology = graph_config.graph_model
 
+    print(graph_config.infer_graph_topology)
+    print(graph_config.graph_topology_task)
+
+
 
     if graph_config.infer_graph_topology and graph_config.graph_topology_task:
         from cognee.modules.topology.topology import TopologyEngine
         topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology)
         await topology_engine.add_graph_topology(dataset_files=dataset_files)
+        print('infered topology added')
+        parent_node_id=None
     elif not graph_config.infer_graph_topology:
         from cognee.modules.topology.topology import TopologyEngine
         topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology)
         await topology_engine.add_graph_topology(graph_config.topology_file_path)
+        print('provided topology added')
+        parent_node_id=None
     elif not graph_config.graph_topology_task:
         parent_node_id = f"DefaultGraphModel__{USER_ID}"
 
@@ -294,14 +303,30 @@ async def test():
 
         from cognee.api.v1.add import add
 
+        dataset_name = "explanations"
+        print(os.getcwd())
+        data_dir = os.path.abspath("../../../.data")
+        print(os.getcwd())
+        from pathlib import Path
+        dir = Path.joinpath(Path.cwd(), ".data")
+
+        await add(f"data://{dir}", dataset_name="explanations")
+
         await add([text], "example_dataset")
 
         from cognee.api.v1.config.config import config
         config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE )
         config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER)
         config.embedding_engine = LiteLLMEmbeddingEngine()
+        config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE)
-        config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE)
+        
-        config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE)
+        
+        config.set_graph_topology_task=True
+        config.set_infer_graph_topology=True
+
+        from cognee.api.v1.datasets.datasets import datasets
+        print(datasets.list_datasets())
-        print(datasets.list_datasets())
+        logger.debug("Available datasets: " + str(datasets.list_datasets()))
-        print(datasets.list_datasets())
+        logger.debug("Available datasets: " + str(datasets.list_datasets()))
+
+        graph = await cognify("explanations")
 
-        graph = await cognify()
         # vector_client = infrastructure_config.get_config("vector_engine")
         #
         # out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10)

diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py
@@ -132,5 +132,18 @@ def set_graphistry_password(graphistry_password: str):
         base_config = get_base_config()
         base_config.graphistry_password = graphistry_password
 
+    @staticmethod
+    def set_graph_topology_task(graph_topology_task: bool):
+        base_config = get_graph_config()
+        base_config.graph_topology_task = graph_topology_task
+
+    @staticmethod
+    def set_infer_graph_topology(infer_graph_topology: bool):
+        base_config = get_graph_config()
+        base_config.infer_graph_topology = infer_graph_topology
 
 
+    @staticmethod
+    def set_topology_file_path(topology_file_path: bool):
+        base_config = get_graph_config()
+        base_config.topology_file_path = topology_file_path
diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py
@@ -19,8 +19,8 @@ class GraphConfig(BaseSettings):
         graph_filename
     )
     graph_model: object = KnowledgeGraph
-    graph_topology_task: bool = False
-    graph_topology: object = KnowledgeGraph
+    graph_topology_task: bool = True
+    graph_topology_model: object = KnowledgeGraph
     infer_graph_topology: bool = True
     topology_file_path: str = os.path.join(
         os.path.join(get_absolute_path(".cognee_system"), "databases"),
@@ -39,6 +39,7 @@ def to_dict(self) -> dict:
             "graph_database_password": self.graph_database_password,
             "graph_database_port": self.graph_database_port,
             "infer_graph_topology": self.infer_graph_topology,
+            "graph_topology_task": self.graph_topology_task,
         }
 
 

diff --git a/cognee/infrastructure/llm/prompts/extract_topology.txt b/cognee/infrastructure/llm/prompts/extract_topology.txt
@@ -1,4 +1,6 @@
 You are a topology master and need to extract the following topology information from the text provided to you.
 Relationship parts can't be empty, and have to be logical AND CONNECTING ELEMENTS OF THE TOPOLOGY
 The source is the parent of the target. And the target is the child of the source.
-Have in mind this model needs to become a graph later, and USE EXISTING IDS AS NODE IDS
+Have in mind this model needs to become a graph later, and USE EXISTING IDS AS NODE IDS
+You are just connecting documents, you don't need to decompose the documents into words or anything like that.
+Use document id as name if name is not present
diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py
@@ -7,7 +7,7 @@
 async def infer_data_topology(content: str, graph_topology=None):
     if graph_topology is None:
         graph_config = get_graph_config()
-        graph_topology = graph_config.graph_topology
+        graph_topology = graph_config.graph_topology_model
 
     print("content: ", type(content))
     try:

diff --git a/docs/blog/index.md b/docs/blog/index.md
@@ -8,7 +8,7 @@ The goal of the blog is to discuss broader topics around the cognee project, inc
 
 1. [LLMOps stack + Graphs](posts/Shiny_new_LLMOps.md)
 
-[//]: # (2. [Where do knowledge graphs fit, and where do they not? A case study with dynamo.fyi]&#40;posts/where-do-knowledge-graphs-fit.md&#41;)
+2. [Where do knowledge graphs fit, and where do they not? A case study with dynamo.fyi](posts/where-do-knowledge-graphs-fit-and-where-they-dont.md)
 
 [//]: # (3. [Knowledge Graphs vs basic RAGs, some metrics]&#40;posts/knowledge-graphs-vs-basic-rags.md&#41;)
 

diff --git a/docs/blog/posts/where-do-knowledge-graphs-fit-and-where-they-dont.md b/docs/blog/posts/where-do-knowledge-graphs-fit-and-where-they-dont.md
@@ -0,0 +1,85 @@
+---
+draft: False
+date: 2024-07-03
+tags:
+  - dynamo.fyi
+  - rag
+  - deepeval
+  - openai 
+  - functions
+authors:
+  - tricalt
+---
+
+
+
+# Cognee - Case study with Dynamo.fyi
+
+Over the past three months, we've been developing Cognee—a self-improving data storage system for AI applications and agents. Since our initial library release, we have:
+
+- Add OSS model support
+- Extend graph database support
+- Improve the codebase
+- Add a simple, extendable web UI
+
+![where-do-knowledge-graphs-fit/timeline.jpg](where-do-knowledge-graphs-fit/timeline.jpg)
+
+
+We are still working on adding evaluations. We’re also planning a library rewrite for better scalability, with many more vector and graph store extensions on the way.
+
+## Future Directions
+
+Instead of developing in isolation, we chose to collaborate with several design partners to build a solid foundation of practical examples of what works and what doesn’t.
+
+Recently, we worked with our design partner Dynamo.fyi on one of the first production deployments of Cognee. We’ll summarize the results of this project in the following sections.
+
+In previous integrations with keepi.ai and Luccid, we deployed Cognee on AWS, built a cognitive model, and provided a managed service. These initial integrations helped us iterate faster with the product and understand what we need to develop and build in the next iterations. 
+
+As we iterated on the product and released a Python library, releasing it in production became a priority. As we imagined running cognee on our users' infrastructure, for our next step, we decided to work with Dynamo on deploying cognee on their cloud.
+
+We took this approach deliberately because creating integrations and providing solutions on someone else's infrastructure is complex. It involves many considerations, such as cloud provider choices, permissions, and data handling. Questions about ingestion, security, and relevant concepts, such as guardrails, quickly follow.
+
+## Helping Dynamo Interact With Users
+
+Dynamo helps gaming companies interact with their user base. Agents communicate via messenger to offer bonuses and encourage participation in tournaments and other games. The company needed a personalization layer to enable agents to interact optimally with users, providing specific suggestions based on purchase activity, recent events, and more.
+
+We were tasked with building a naive recommender system using LLMs to suggest optimal responses. This required a feature store containing relevant user-level information and data enriched with LLM-structured outputs. This data was then fed to Cognee to support agents with personalized messages and real-time analytics.
+
+## From Naive RAG to GraphRAG
+
+Upon arrival at Dynamo, we found a rudimentary agent support system connected to OpenAI with one templated query. Instead of building a GraphRAG, we needed to replicate the existing functionality and create an infrastructure layer for future evolution towards GraphRAG. 
+
+GraphRAG would provide a semantic and personalization layer above the Firebase store where all customer data is located. However, this would only be useful if we had a system to release our agent support assistant to production.
-GraphRAG would provide a semantic and personalization layer above the Firebase store where all customer data is located. However, this would only be useful if we had a system to release our agent support assistant to production.
+GraphRAG would provide a semantic and personalization layer above the Firebase store, where all customer data is located. However, this would only be useful if we had a system to release our agent support assistant to production.
-GraphRAG would provide a semantic and personalization layer above the Firebase store where all customer data is located. However, this would only be useful if we had a system to release our agent support assistant to production.
+GraphRAG would provide a semantic and personalization layer above the Firebase store, where all customer data is located. However, this would only be useful if we had a system to release our agent support assistant to production.
+
+Our initial work involved preparing a data layer, deployments, endpoints, and monitoring layers. Once we released the first version to production, our design partner started requesting features that necessitated a GraphRAG.
+
+These features included timelines of recent vs. non-recent events, complex user enrichments, deterministic to non-deterministic data mapping provided by the LLM, complex logic creation, behavior analysis across sessions, and agent personality extraction.
+
+These features will be implemented soon, thus allowing us to evaluate our basic RAG vs GraphRAG.
+
+## Outcomes
+
+In one month, we built, deployed, integrated, and tested a service that provided Dynamo agents with several new tools. These tools help agents analyze user transactions, suggest replies, and initiate conversations.
+
+Here is a simplified interpretation of the flow:
+
+![where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg](where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg)
+
+
+We also ran evaluations comparing the old and new services using the open-source tool DeepEval for visualization. The metrics used for evaluation, with AI as the judge, were:
+
+- Answer Relevancy: Measures the quality of the RAG pipeline's generator by evaluating how relevant the actual_output is compared to the provided input.
+- Faithfulness: Measures the quality of the RAG pipeline's generator by evaluating whether the actual_output factually aligns with the contents of the retrieval_context.
+
+## Results
+
+The old service performed as follows:
+
+The new service shows the following results on test cases on the user level, running on 55 manually generated questions based on previous User/Agent interactions: 
+
+![where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg](where-do-knowledge-graphs-fit/cognee_results.png)
+
+
+Since in both cases we had structured data extracted from a relational store and provided as context, we did not face Faithfulness issues. On the other hand, we significantly improved the baseline Answer Relevancy.
+
+![where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg](where-do-knowledge-graphs-fit/initial_results.png)
diff --git a/docs/blog/posts/where-do-knowledge-graphs-fit/cognee_results.png b/docs/blog/posts/where-do-knowledge-graphs-fit/cognee_results.png
diff --git a/docs/blog/posts/where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg b/docs/blog/posts/where-do-knowledge-graphs-fit/dynamo_fyi_demo.jpg
diff --git a/docs/blog/posts/where-do-knowledge-graphs-fit/initial_results.png b/docs/blog/posts/where-do-knowledge-graphs-fit/initial_results.png
diff --git a/docs/blog/posts/where-do-knowledge-graphs-fit/timeline.jpg b/docs/blog/posts/where-do-knowledge-graphs-fit/timeline.jpg