Skip to content

Commit

Permalink
NEW: add reattach_fields function
Browse files Browse the repository at this point in the history
parse page/document annotations for orphan fields and reattach them to AcroForm/Fields
closes py-pdf#2453
  • Loading branch information
pubpub-zz committed Feb 27, 2024
1 parent 178014e commit c8ba914
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
46 changes: 46 additions & 0 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,52 @@ def update_page_form_field_values(
value if value in k[AA.AP]["/N"] else "/Off"
)

def reattach_fields(
self, page: Optional[PageObject] = None
) -> List[DictionaryObject]:
"""
Parse annotations within the page looking for orphan fields and
reattach then into the Fields Structure
Args:
page: page to analyze.
If none is provided, all pages will be analyzed
Returns:
list of reattached fields
"""
lst = []
if page is None:
for p in self.pages:
lst += self.reattach_fields(p)
return lst

try:
af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
except KeyError:
af = DictionaryObject()
self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af
try:
fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields])
except KeyError:
fields = ArrayObject()
af[NameObject(InteractiveFormDictEntries.Fields)] = fields

if "/Annots" not in page:
return lst
annots = cast(ArrayObject, page["/Annots"])
for idx in range(len(annots)):
ano = annots[idx]
indirect = isinstance(ano, IndirectObject)
ano = cast(DictionaryObject, ano.get_object())
if ano.get("/Subtype", "") == "/Widget" and "/FT" in ano:
if ano.indirect_reference in fields:
continue
if not indirect:
annots[idx] = self._add_object(ano)
fields.append(ano.indirect_reference)
lst.append(ano)
return lst

def clone_reader_document_root(self, reader: PdfReader) -> None:
"""
Copy the reader document root to the writer and all sub elements,
Expand Down
21 changes: 21 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1978,3 +1978,24 @@ def create_number_pdf(n) -> BytesIO:
for n, page in enumerate(reader.pages):
text = page.extract_text()
assert text == str(n)


@pytest.mark.enable_socket()
def test_reattach_fields():
"""
Test Reattach function
addressed in #2453
"""
url = "https://github.com/py-pdf/pypdf/files/14241368/ExampleForm.pdf"
name = "iss2453.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
writer = PdfWriter()
for p in reader.pages:
writer.add_page(p)
assert len(writer.reattach_fields()) == 15
assert len(writer.reattach_fields()) == 0 # nothing to append anymore
assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15
writer = PdfWriter(clone_from=reader)
assert len(writer.reattach_fields()) == 7
writer.reattach_fields()
assert len(writer._root_object["/AcroForm"]["/Fields"]) == 15

0 comments on commit c8ba914

Please sign in to comment.