topoteretes · Vasilije1990 · Jun 9, 2024 · Jun 3, 2024 · Jun 5, 2024 · Jun 7, 2024
diff --git a/cognee/api/v1/topology/add_topology.py b/cognee/api/v1/topology/add_topology.py
@@ -1,10 +1,43 @@
 import pandas as pd
 from pydantic import BaseModel
+
 from typing import List, Dict, Any, Union, Optional
 from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
 from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
 from cognee.infrastructure.databases.graph.config import get_graph_config
 
+import os
+import pandas as pd
+import json
+from pydantic import BaseModel, Field
+from typing import Dict, List, Optional, Union, Type, Any
+from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
+
+
+
+
+class Relationship(BaseModel):
+    type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
+    source: Optional[str] = Field(None, description="The identifier of the source id of in the relationship being a directory or subdirectory")
+    target: Optional[str] = Field(None, description="The identifier of the target id in the relationship being the directory, subdirectory or file")
+    properties: Optional[Dict[str, Any]] = Field(None, description="A dictionary of additional properties and values related to the relationship.")
+
+class JSONEntity(BaseModel):
+    name: str
+    set_type_as: Optional[str] = None
+    property_columns: List[str]
+    description: Optional[str] = None
+
+class JSONPattern(BaseModel):
+    head: str
+    relation: str
+    tail: str
+    description: Optional[str] = None
+
+class JSONModel(BaseModel):
+    node_id: str
+    entities: List[JSONEntity]
+    patterns: List[JSONPattern]
 USER_ID = "default_user"
 
 async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
@@ -44,11 +77,12 @@ def flatten_repository(repo_model: BaseModel) -> List[Dict[str, Any]]:
         """ Flatten the entire repository model, starting with the top-level model """
         return recursive_flatten(repo_model)
 
-    flt_topology = flatten_repository(topology)
+    async def add_graph_topology():
+
+        flt_topology = flatten_repository(topology)
 
-    df = pd.DataFrame(flt_topology)
+        df = pd.DataFrame(flt_topology)
 
-    print(df.head(10))
 
     for _, row in df.iterrows():
         node_data = row.to_dict()

diff --git a/cognee/infrastructure/data/chunking/LangchainChunkingEngine.py b/cognee/infrastructure/data/chunking/LangchainChunkingEngine.py
@@ -29,6 +29,9 @@ def chunk_data(
 
         if chunk_strategy == ChunkStrategy.CODE:
             chunked_data = LangchainChunkEngine.chunk_data_by_code(source_data,chunk_size, chunk_overlap)
+
+        elif chunk_strategy == ChunkStrategy.LANGCHAIN_CHARACTER:
+            chunked_data = LangchainChunkEngine.chunk_data_by_character(source_data,chunk_size, chunk_overlap)
         else:
             chunked_data = DefaultChunkEngine.chunk_data_by_paragraph(source_data,chunk_size, chunk_overlap)
         return chunked_data
@@ -50,3 +53,12 @@ def chunk_data_by_code(data_chunks, chunk_size, chunk_overlap, language=None):
 
         return only_content
 
+    def chunk_data_by_character(self, data_chunks, chunk_size, chunk_overlap):
+        from langchain_text_splitters import RecursiveCharacterTextSplitter
+        splitter = RecursiveCharacterTextSplitter(chunk_size, chunk_overlap)
+        data = splitter.split(data_chunks)
+
+        only_content = [chunk.page_content for chunk in data]
+
+        return only_content
+
diff --git a/cognee/shared/data_models.py b/cognee/shared/data_models.py
@@ -35,6 +35,7 @@ class ChunkStrategy(Enum):
     PARAGRAPH = "paragraph"
     SENTENCE = "sentence"
     CODE    = "code"
+    LANGCHAIN_CHARACTER = "langchain_character"
 
 class MemorySummary(BaseModel):
     """ Memory summary. """

diff --git a/docs/blog/posts/llmops-and-knowledge-graphs.md b/docs/blog/posts/llmops-and-knowledge-graphs.md
diff --git a/docs/research.md b/docs/research.md
@@ -5,6 +5,7 @@ The page is dedicated to collecting all research that was collected in the past
 This is not an exhaustive list, and any PRs would be welcome
 
 ### Research Papers
+- [2024/06/04] [Transformers and episodic memory](https://arxiv.org/abs/2405.14992)
 - [2024/03/24] [Graph Chain-of-Thought: Augmenting Large Language Models by Reasoning on Graphs](https://arxiv.org/abs/2404.07103)
 - [2024/03/24] [Leave No Context Behind: Efficient Infinite Context Transformers with Infini-attention](https://arxiv.org/abs/2404.07143)
 - [2024/03/24] [Compound AI systems](https://bair.berkeley.edu/blog/2024/02/18/compound-ai-systems/)