diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index 9cd1e29b..ae478885 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -37,10 +37,10 @@ def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path] try: if isinstance(self.path_or_stream, BytesIO): - text_stream = self.path_or_stream.getvalue().decode("utf-8") + text_stream = self.path_or_stream.getvalue() self.soup = BeautifulSoup(text_stream, "html.parser") if isinstance(self.path_or_stream, Path): - with open(self.path_or_stream, "r", encoding="utf-8") as f: + with open(self.path_or_stream, "rb") as f: html_content = f.read() self.soup = BeautifulSoup(html_content, "html.parser") except Exception as e: