-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Docs update #107
Docs update #107
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,43 @@ | ||
import pandas as pd | ||
from pydantic import BaseModel | ||
|
||
from typing import List, Dict, Any, Union, Optional | ||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client | ||
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel | ||
from cognee.infrastructure.databases.graph.config import get_graph_config | ||
|
||
import os | ||
import pandas as pd | ||
import json | ||
from pydantic import BaseModel, Field | ||
from typing import Dict, List, Optional, Union, Type, Any | ||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client | ||
|
||
|
||
|
||
|
||
class Relationship(BaseModel): | ||
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.") | ||
source: Optional[str] = Field(None, description="The identifier of the source id of in the relationship being a directory or subdirectory") | ||
target: Optional[str] = Field(None, description="The identifier of the target id in the relationship being the directory, subdirectory or file") | ||
properties: Optional[Dict[str, Any]] = Field(None, description="A dictionary of additional properties and values related to the relationship.") | ||
|
||
class JSONEntity(BaseModel): | ||
name: str | ||
set_type_as: Optional[str] = None | ||
property_columns: List[str] | ||
description: Optional[str] = None | ||
|
||
class JSONPattern(BaseModel): | ||
head: str | ||
relation: str | ||
tail: str | ||
description: Optional[str] = None | ||
Comment on lines
+31
to
+35
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to |
||
|
||
class JSONModel(BaseModel): | ||
node_id: str | ||
entities: List[JSONEntity] | ||
patterns: List[JSONPattern] | ||
USER_ID = "default_user" | ||
|
||
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any: | ||
|
@@ -44,11 +77,12 @@ def flatten_repository(repo_model: BaseModel) -> List[Dict[str, Any]]: | |
""" Flatten the entire repository model, starting with the top-level model """ | ||
return recursive_flatten(repo_model) | ||
|
||
flt_topology = flatten_repository(topology) | ||
async def add_graph_topology(): | ||
|
||
flt_topology = flatten_repository(topology) | ||
|
||
df = pd.DataFrame(flt_topology) | ||
df = pd.DataFrame(flt_topology) | ||
Comment on lines
+80
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The ToolsRuff
|
||
|
||
print(df.head(10)) | ||
|
||
for _, row in df.iterrows(): | ||
node_data = row.to_dict() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,9 @@ def chunk_data( | |
|
||
if chunk_strategy == ChunkStrategy.CODE: | ||
chunked_data = LangchainChunkEngine.chunk_data_by_code(source_data,chunk_size, chunk_overlap) | ||
|
||
elif chunk_strategy == ChunkStrategy.LANGCHAIN_CHARACTER: | ||
chunked_data = LangchainChunkEngine.chunk_data_by_character(source_data,chunk_size, chunk_overlap) | ||
else: | ||
chunked_data = DefaultChunkEngine.chunk_data_by_paragraph(source_data,chunk_size, chunk_overlap) | ||
return chunked_data | ||
|
@@ -50,3 +53,12 @@ def chunk_data_by_code(data_chunks, chunk_size, chunk_overlap, language=None): | |
|
||
return only_content | ||
|
||
def chunk_data_by_character(self, data_chunks, chunk_size, chunk_overlap): | ||
from langchain_text_splitters import RecursiveCharacterTextSplitter | ||
splitter = RecursiveCharacterTextSplitter(chunk_size, chunk_overlap) | ||
data = splitter.split(data_chunks) | ||
|
||
only_content = [chunk.page_content for chunk in data] | ||
|
||
return only_content | ||
Comment on lines
+56
to
+63
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well implemented method for character-based chunking. Consider adding unit tests to ensure its functionality. Would you like me to help with writing the unit tests for this method? |
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider adding validation for optional fields in
JSONEntity
to ensure data integrity.