Skip to content

Commit

Permalink
fix: truncate ByteStream string representation (#8673)
Browse files Browse the repository at this point in the history
* fix: truncate ByteStream string representation

* add reno

* better reno

* add test

* Update test_byte_stream.py

* apply feedback

* update reno
  • Loading branch information
tstadel authored Jan 7, 2025
1 parent 8e3f647 commit e6059e6
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
14 changes: 13 additions & 1 deletion haystack/dataclasses/byte_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import Any, Dict, Optional


@dataclass
@dataclass(repr=False)
class ByteStream:
"""
Base data class representing a binary object in the Haystack API.
Expand Down Expand Up @@ -63,3 +63,15 @@ def to_string(self, encoding: str = "utf-8") -> str:
:raises: UnicodeDecodeError: If the ByteStream data cannot be decoded with the specified encoding.
"""
return self.data.decode(encoding)

def __repr__(self) -> str:
"""
Return a string representation of the ByteStream, truncating the data to 100 bytes.
"""
fields = []
truncated_data = self.data[:100] + b"..." if len(self.data) > 100 else self.data
fields.append(f"data={truncated_data!r}")
fields.append(f"meta={self.meta!r}")
fields.append(f"mime_type={self.mime_type!r}")
fields_str = ", ".join(fields)
return f"{self.__class__.__name__}({fields_str})"
4 changes: 4 additions & 0 deletions releasenotes/notes/fix-bytestream-str-8dd6d5e9a87f6aa4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
ByteStream now truncates the data to 100 bytes in the string representation to avoid excessive log output.
9 changes: 9 additions & 0 deletions test/dataclasses/test_byte_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,12 @@ def test_to_file(tmp_path, request):
ByteStream(test_str.encode()).to_file(test_path)
with open(test_path, "rb") as fd:
assert fd.read().decode() == test_str


def test_str_truncation():
test_str = "1234567890" * 100
b = ByteStream.from_string(test_str, mime_type="text/plain", meta={"foo": "bar"})
string_repr = str(b)
assert len(string_repr) < 200
assert "text/plain" in string_repr
assert "foo" in string_repr

0 comments on commit e6059e6

Please sign in to comment.