Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/cog 539 implementing additional retriever approaches #262

Merged
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
475133a
fix: refactor get_graph_from_model to return nodes and edges correctly
borisarzentar Dec 5, 2024
20968be
fix: add missing params
borisarzentar Dec 5, 2024
11055c0
fix: remove complex zip usage
borisarzentar Dec 5, 2024
f36fe70
fix: add edges to data_point properties
borisarzentar Dec 5, 2024
33cdca5
Merge remote-tracking branch 'origin/main' into fix/graph-extraction-…
borisarzentar Dec 5, 2024
3f04dbb
fix: handle rate limit error coming from llm model
borisarzentar Dec 5, 2024
461ae00
fix: fixes lost edges and nodes in get_graph_from_model
hajdul88 Dec 5, 2024
8cbac09
fix: fixes database pruning issue in pgvector
hajdul88 Dec 6, 2024
f4403f1
fix: fixes database pruning issue in pgvector (#261)
hajdul88 Dec 6, 2024
0128530
feat: adds code summary embeddings to vector DB
hajdul88 Dec 6, 2024
071aa66
Merge branch 'fix/graph-extraction-from-model' into feature/cog-539-i…
hajdul88 Dec 6, 2024
3856712
Merge pull request #264 from topoteretes/gh-actions-all-branches
hajdul88 Dec 6, 2024
2c89e70
fix: cognee_demo notebook pipeline is not saving summaries
borisarzentar Dec 6, 2024
ed24107
Merge remote-tracking branch 'origin/main' into fix/graph-extraction-…
borisarzentar Dec 6, 2024
c2d1057
Merge remote-tracking branch 'origin/fix/graph-extraction-from-model'…
borisarzentar Dec 6, 2024
6ade117
Merge branch 'fix/graph-extraction-from-model' into feature/cog-539-i…
hajdul88 Dec 6, 2024
fb0f97c
Merge branch 'main' into feature/cog-539-implementing-additional-retr…
hajdul88 Dec 6, 2024
f4a0104
feat: implements first version of codegraph retriever
hajdul88 Dec 6, 2024
51b825e
Merge branch 'main' into feature/cog-539-implementing-additional-retr…
hajdul88 Dec 6, 2024
3ef3376
Merge branch 'feature/cog-539-implementing-additional-retriever-appro…
hajdul88 Dec 6, 2024
7b2dbf7
Merge branch 'main' into feature/cog-539-implementing-additional-retr…
hajdul88 Dec 9, 2024
af55ac5
chore: implements minor changes mostly to make the code production ready
hajdul88 Dec 9, 2024
3cbcfc1
fix: turns off raising duplicated edges unit test as we have these in…
hajdul88 Dec 9, 2024
1e3367d
feat: implements unit tests for description to codepart search
hajdul88 Dec 9, 2024
7edb5dd
fix: fixes edge property inconsistent access in codepart retriever
hajdul88 Dec 9, 2024
00a3668
chore: implements more precise typing for get_attribute method for co…
hajdul88 Dec 9, 2024
0a022ce
Merge branch 'main' into feature/cog-539-implementing-additional-retr…
hajdul88 Dec 10, 2024
a39e692
chore: adds spacing to tests and changes the cogneegraph getter names
hajdul88 Dec 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cognee/modules/graph/cognee_graph/CogneeGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def add_edge(self, edge: Edge) -> None:
edge.node1.add_skeleton_edge(edge)
edge.node2.add_skeleton_edge(edge)
else:
raise EntityAlreadyExistsError(message=f"Edge {edge} already exists in the graph.")
print(f"Edge {edge} already exists in the graph.")
hajdul88 marked this conversation as resolved.
Show resolved Hide resolved

def get_node(self, node_id: str) -> Node:
return self.nodes.get(node_id, None)
Expand Down
14 changes: 13 additions & 1 deletion cognee/modules/graph/cognee_graph/CogneeGraphElements.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def add_attribute(self, key: str, value: Any) -> None:
def get_attribute(self, key: str) -> Union[str, int, float]:
return self.attributes[key]

def get_skeleton_edges(self):
return self.skeleton_edges

def get_skeleton_neighbours(self):
return self.skeleton_neighbours

def __repr__(self) -> str:
return f"Node({self.id}, attributes={self.attributes})"

Expand Down Expand Up @@ -109,9 +115,15 @@ def is_edge_alive_in_dimension(self, dimension: int) -> bool:
def add_attribute(self, key: str, value: Any) -> None:
self.attributes[key] = value

def get_attribute(self, key: str, value: Any) -> Union[str, int, float]:
def get_attribute(self, key: str) -> Union[str, int, float]:
return self.attributes[key]
hajdul88 marked this conversation as resolved.
Show resolved Hide resolved

def get_node_from(self):
hajdul88 marked this conversation as resolved.
Show resolved Hide resolved
return self.node1

def get_node_to(self):
return self.node2

def __repr__(self) -> str:
direction = "->" if self.directed else "--"
return f"Edge({self.node1.id} {direction} {self.node2.id}, attributes={self.attributes})"
Expand Down
96 changes: 96 additions & 0 deletions cognee/modules/retrieval/description_to_codepart_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import asyncio
import logging

from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User
from cognee.shared.utils import send_telemetry


async def code_description_to_code_part_search(query: str, user: User = None, top_k = 2) -> list:
if user is None:
user = await get_default_user()

if user is None:
raise PermissionError("No user found in the system. Please create a user.")

retrieved_codeparts = await code_description_to_code_part(query, user, top_k)
return retrieved_codeparts



async def code_description_to_code_part(
query: str,
user: User,
top_k: int
) -> list:
"""
Performs codegraph description to code part map for CodeGraph pipeline.

Args:
query (str): The search query
user (User): The user performing the search
top_k (int): The number of top results to retrieve

Returns:
list: Corresponding code pieces to the query.
"""
if not query or not isinstance(query, str):
raise ValueError("The query must be a non-empty string.")
if top_k <= 0:
raise ValueError("top_k must be a positive integer.")

try:
vector_engine = get_vector_engine()
graph_engine = await get_graph_engine()
except Exception as e:
logging.error("Failed to initialize engines: %s", e)
raise RuntimeError("Initialization error") from e

send_telemetry("code_description_to_code_part_search EXECUTION STARTED", user.id)

try:

results = await vector_engine.search("code_summary_text", query_text=query, limit=top_k)

memory_fragment = CogneeGraph()

await memory_fragment.project_graph_from_db(graph_engine,
node_properties_to_project=['id',
'type',
'text',
'source_code'],
edge_properties_to_project=['relationship_name'])
hajdul88 marked this conversation as resolved.
Show resolved Hide resolved

code_pieces_to_return = set()

for node in results: # :TODO: This must be changed when the structure of codegraph will change and it will. Now this is the initial version that works well with the actual implementation
node_to_search_from = memory_fragment.get_node(str(node.id))
for code_file in node_to_search_from.get_skeleton_neighbours():
borisarzentar marked this conversation as resolved.
Show resolved Hide resolved
for code_file_edge in code_file.get_skeleton_edges():
if code_file_edge.get_attribute('relationship_type') == 'contains':
code_pieces_to_return.add(code_file_edge.get_node_to())

return code_pieces_to_return
hajdul88 marked this conversation as resolved.
Show resolved Hide resolved

except Exception as e:
logging.error("Error during description to codepart search for user: %s, query: %s. Error: %s", user.id, query, e)
send_telemetry("code_description_to_code_part_search EXECUTION FAILED", user.id)
raise RuntimeError("An error occurred during description to codepart search") from e


if __name__ == "__main__":
async def main():
query = "I am looking for a class with blue eyes"
user = None
try:
results = await code_description_to_code_part_search(query, user)
print("Retrieved Code Parts:", results)
except Exception as e:
print(f"An error occurred: {e}")

asyncio.run(main())


1 change: 1 addition & 0 deletions cognee/tasks/summarization/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class TextSummary(DataPoint):


class CodeSummary(DataPoint):
__tablename__ = "code_summary"
text: str
made_from: CodeFile

Expand Down
Loading