Skip to content

Commit

Permalink
refactor: Add error handling to hash util
Browse files Browse the repository at this point in the history
Added error handling to reading of file in hash util

Refactor COG-505
  • Loading branch information
dexters1 committed Dec 5, 2024
1 parent e80377b commit 1e098ae
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 12 deletions.
9 changes: 9 additions & 0 deletions cognee/shared/exceptions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Custom exceptions for the Cognee API.
This module defines a set of exceptions for handling various shared utility errors
"""

from .exceptions import (
IngestionError,
)
11 changes: 11 additions & 0 deletions cognee/shared/exceptions/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from cognee.exceptions import CogneeApiError
from fastapi import status

class IngestionError(CogneeApiError):
def __init__(
self,
message: str = "Failed to load data.",
name: str = "IngestionError",
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
):
super().__init__(message, name, status_code)
29 changes: 17 additions & 12 deletions cognee/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from uuid import uuid4
import pathlib

from cognee.shared.exceptions import IngestionError

# Analytics Proxy Url, currently hosted by Vercel
proxy_url = "https://test.prometh.ai"

Expand Down Expand Up @@ -76,23 +78,26 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
def get_file_content_hash(file_obj: Union[str, BinaryIO]) -> str:
h = hashlib.md5()

if isinstance(file_obj, str):
with open(file_obj, 'rb') as file:
try:
if isinstance(file_obj, str):
with open(file_obj, 'rb') as file:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
if not chunk:
break
h.update(chunk)
else:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file.read(h.block_size)
chunk = file_obj.read(h.block_size)
if not chunk:
break
h.update(chunk)
else:
while True:
# Reading is buffered, so we can read smaller chunks.
chunk = file_obj.read(h.block_size)
if not chunk:
break
h.update(chunk)

return h.hexdigest()

return h.hexdigest()
except IOError as e:
raise IngestionError(message=f"Failed to load data from {file}: {e}")

def trim_text_to_max_tokens(text: str, max_tokens: int, encoding_name: str) -> str:
"""
Expand Down

0 comments on commit 1e098ae

Please sign in to comment.