From c8ba91488a8872431ffa15cbfee316b87bfb48b8 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:18:11 +0100 Subject: [PATCH] NEW: add reattach_fields function parse page/document annotations for orphan fields and reattach them to AcroForm/Fields closes #2453 --- pypdf/_writer.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_writer.py | 21 ++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 258e5cab8..3de9347aa 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -932,6 +932,52 @@ def update_page_form_field_values( value if value in k[AA.AP]["/N"] else "/Off" ) + def reattach_fields( + self, page: Optional[PageObject] = None + ) -> List[DictionaryObject]: + """ + Parse annotations within the page looking for orphan fields and + reattach then into the Fields Structure + + Args: + page: page to analyze. + If none is provided, all pages will be analyzed + Returns: + list of reattached fields + """ + lst = [] + if page is None: + for p in self.pages: + lst += self.reattach_fields(p) + return lst + + try: + af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM]) + except KeyError: + af = DictionaryObject() + self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af + try: + fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields]) + except KeyError: + fields = ArrayObject() + af[NameObject(InteractiveFormDictEntries.Fields)] = fields + + if "/Annots" not in page: + return lst + annots = cast(ArrayObject, page["/Annots"]) + for idx in range(len(annots)): + ano = annots[idx] + indirect = isinstance(ano, IndirectObject) + ano = cast(DictionaryObject, ano.get_object()) + if ano.get("/Subtype", "") == "/Widget" and "/FT" in ano: + if ano.indirect_reference in fields: + continue + if not indirect: + annots[idx] = self._add_object(ano) + fields.append(ano.indirect_reference) + lst.append(ano) + return lst + def clone_reader_document_root(self, reader: PdfReader) -> None: """ Copy the reader document root to the writer and all sub elements, diff --git a/tests/test_writer.py b/tests/test_writer.py index 7bf644b53..28a333782 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1978,3 +1978,24 @@ def create_number_pdf(n) -> BytesIO: for n, page in enumerate(reader.pages): text = page.extract_text() assert text == str(n) + + +@pytest.mark.enable_socket() +def test_reattach_fields(): + """ + Test Reattach function + addressed in #2453 + """ + url = "https://github.com/py-pdf/pypdf/files/14241368/ExampleForm.pdf" + name = "iss2453.pdf" + reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) + writer = PdfWriter() + for p in reader.pages: + writer.add_page(p) + assert len(writer.reattach_fields()) == 15 + assert len(writer.reattach_fields()) == 0 # nothing to append anymore + assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15 + writer = PdfWriter(clone_from=reader) + assert len(writer.reattach_fields()) == 7 + writer.reattach_fields() + assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15