From 1e098ae70d4449b53328157f8d8a86059f4f73da Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 5 Dec 2024 20:54:55 +0100 Subject: [PATCH] refactor: Add error handling to hash util Added error handling to reading of file in hash util Refactor COG-505 --- cognee/shared/exceptions/__init__.py | 9 ++++++++ cognee/shared/exceptions/exceptions.py | 11 ++++++++++ cognee/shared/utils.py | 29 +++++++++++++++----------- 3 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 cognee/shared/exceptions/__init__.py create mode 100644 cognee/shared/exceptions/exceptions.py diff --git a/cognee/shared/exceptions/__init__.py b/cognee/shared/exceptions/__init__.py new file mode 100644 index 000000000..9b86cccab --- /dev/null +++ b/cognee/shared/exceptions/__init__.py @@ -0,0 +1,9 @@ +""" +Custom exceptions for the Cognee API. + +This module defines a set of exceptions for handling various shared utility errors +""" + +from .exceptions import ( + IngestionError, +) \ No newline at end of file diff --git a/cognee/shared/exceptions/exceptions.py b/cognee/shared/exceptions/exceptions.py new file mode 100644 index 000000000..101711398 --- /dev/null +++ b/cognee/shared/exceptions/exceptions.py @@ -0,0 +1,11 @@ +from cognee.exceptions import CogneeApiError +from fastapi import status + +class IngestionError(CogneeApiError): + def __init__( + self, + message: str = "Failed to load data.", + name: str = "IngestionError", + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + ): + super().__init__(message, name, status_code) \ No newline at end of file diff --git a/cognee/shared/utils.py b/cognee/shared/utils.py index 1dc52acd5..b75076e55 100644 --- a/cognee/shared/utils.py +++ b/cognee/shared/utils.py @@ -19,6 +19,8 @@ from uuid import uuid4 import pathlib +from cognee.shared.exceptions import IngestionError + # Analytics Proxy Url, currently hosted by Vercel proxy_url = "https://test.prometh.ai" @@ -76,23 +78,26 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int: def get_file_content_hash(file_obj: Union[str, BinaryIO]) -> str: h = hashlib.md5() - if isinstance(file_obj, str): - with open(file_obj, 'rb') as file: + try: + if isinstance(file_obj, str): + with open(file_obj, 'rb') as file: + while True: + # Reading is buffered, so we can read smaller chunks. + chunk = file.read(h.block_size) + if not chunk: + break + h.update(chunk) + else: while True: # Reading is buffered, so we can read smaller chunks. - chunk = file.read(h.block_size) + chunk = file_obj.read(h.block_size) if not chunk: break h.update(chunk) - else: - while True: - # Reading is buffered, so we can read smaller chunks. - chunk = file_obj.read(h.block_size) - if not chunk: - break - h.update(chunk) - - return h.hexdigest() + + return h.hexdigest() + except IOError as e: + raise IngestionError(message=f"Failed to load data from {file}: {e}") def trim_text_to_max_tokens(text: str, max_tokens: int, encoding_name: str) -> str: """