Skip to content

Commit

Permalink
ROB: Decode encoded values in get_fields (#1636)
Browse files Browse the repository at this point in the history
Fixes #424
  • Loading branch information
pubpub-zz authored Feb 25, 2023
1 parent 215df56 commit 2e4b657
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
7 changes: 7 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,13 @@ def __init__(self, data: DictionaryObject) -> None:
self[NameObject(attr)] = data[attr]
except KeyError:
pass
if isinstance(self.get("/V"), EncodedStreamObject):
d = cast(EncodedStreamObject, self[NameObject("/V")]).get_data()
if isinstance(d, bytes):
d = d.decode()
elif d is None:
d = ""
self[NameObject("/V")] = TextStringObject(d)

# TABLE 8.69 Entries common to all field dictionaries
@property
Expand Down
12 changes: 12 additions & 0 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,3 +942,15 @@ def test_extra_test_iss1541():
with pytest.raises(PdfReadError) as exc:
reader.pages[0].extract_text()
assert exc.value.args[0] == "Unexpected end of stream"


@pytest.mark.external
def test_fields_returning_stream():
"""
problem reported in #424
"""
url = "https://github.com/mstamy2/PyPDF2/files/1948267/Simple.form.pdf"
name = "tst_iss424.pdf"
data = BytesIO(get_pdf_from_url(url, name=name))
reader = PdfReader(data, strict=False)
assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"]

0 comments on commit 2e4b657

Please sign in to comment.