From 2e4b6574832b7478d29d37842818da09a2cb71da Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sat, 25 Feb 2023 06:21:45 +0100 Subject: [PATCH] ROB: Decode encoded values in get_fields (#1636) Fixes #424 --- pypdf/generic/_data_structures.py | 7 +++++++ tests/test_workflows.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index bb2e028d2..5fc736d82 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -1162,6 +1162,13 @@ def __init__(self, data: DictionaryObject) -> None: self[NameObject(attr)] = data[attr] except KeyError: pass + if isinstance(self.get("/V"), EncodedStreamObject): + d = cast(EncodedStreamObject, self[NameObject("/V")]).get_data() + if isinstance(d, bytes): + d = d.decode() + elif d is None: + d = "" + self[NameObject("/V")] = TextStringObject(d) # TABLE 8.69 Entries common to all field dictionaries @property diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 74406125b..73d9c9597 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -942,3 +942,15 @@ def test_extra_test_iss1541(): with pytest.raises(PdfReadError) as exc: reader.pages[0].extract_text() assert exc.value.args[0] == "Unexpected end of stream" + + +@pytest.mark.external +def test_fields_returning_stream(): + """ + problem reported in #424 + """ + url = "https://github.com/mstamy2/PyPDF2/files/1948267/Simple.form.pdf" + name = "tst_iss424.pdf" + data = BytesIO(get_pdf_from_url(url, name=name)) + reader = PdfReader(data, strict=False) + assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"]