py-pdf · polyglot-jones · Aug 10, 2020 · Aug 11, 2020 · Aug 11, 2020 · Apr 6, 2022
diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -351,7 +351,7 @@ def appendPagesFromReader(self, reader, after_page_append=None):
             # Trigger callback, pass writer page as parameter
             if callable(after_page_append): after_page_append(writer_page)
 
-    def updatePageFormFieldValues(self, page, fields):
+    def updatePageFormFieldValues(self, page, fields, read_only = False):
         '''
         Update the form field values for a given page from a fields dictionary.
         Copy field texts and values from fields to page.
@@ -360,15 +360,34 @@ def updatePageFormFieldValues(self, page, fields):
             and field data will be updated.
         :param fields: a Python dictionary of field names (/T) and text
             values (/V)
+
+        Credit for figuring out that it sometimes helps to set the field to
+        read-only: https://stackoverflow.com/users/8382028/viatech
         '''
         # Iterate through pages, update field values
         for j in range(0, len(page['/Annots'])):
             writer_annot = page['/Annots'][j].getObject()
             for field in fields:
                 if writer_annot.get('/T') == field:
-                    writer_annot.update({
-                        NameObject("/V"): TextStringObject(fields[field])
-                    })
+                    writer_annot.update({NameObject("/V"): TextStringObject(fields[field])})
+                    if read_only:
+                        writer_annot.update({NameObject("/Ff"): NumberObject(1)})
+
+    def have_viewer_render_fields(self):
+        """
+        Some PDF viewers need to be coaxed into rendering field values.
+        This does so by setting a `/NeedAppearances` attribute to True
+        (which adds to the processing time slightly).
+        Credit for figuring this out: https://stackoverflow.com/users/8382028/viatech
+        """
+        try:
+            catalog = self._root_object
+            if "/AcroForm" not in catalog:
+                self._root_object.update({NameObject("/AcroForm"): IndirectObject(len(self._objects), 0, self)})
+            need_appearances = NameObject("/NeedAppearances")
+            self._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
+        except Exception as e:
+            warnings.warn("Unable to set the /NeedAppearances flag. Filled-in field values may not render correctly. [{}]".format(repr(e)))
 
     def cloneReaderDocumentRoot(self, reader):
         '''
@@ -1346,10 +1365,11 @@ def getFormTextFields(self):
         '''
         # Retrieve document form fields
         formfields = self.getFields()
-        return dict(
-            (formfields[field]['/T'], formfields[field].get('/V')) for field in formfields \
-                if formfields[field].get('/FT') == '/Tx'
-        )
+        if formfields:
+            return dict(
+                (formfields[field]['/T'], formfields[field].get('/V')) for field in formfields \
+                    if formfields[field].get('/FT') == '/Tx')
+        return None
 
     def getNamedDestinations(self, tree=None, retval=None):
         """

diff --git a/Sample_Code/README.md b/Sample_Code/README.md
@@ -0,0 +1,38 @@
+# PyPDF2 Sample Code Folder
+
+This folder contains demonstrations of just a few of PyPDF2's many features.
+
+
+## `basic_features.py`
+
+Sample code that demonstrates:
+
+* Getting metadata from a PDF.
+* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
+* Encrypting a PDF.
+* Adding javascript that runs when the PDF is opened.
+
+
+## `basic_merging.py`
+
+Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
+Selected pages can be added to the end of the output PDF being built, or inserted in the middle.
+
+
+## `make_simple.py`
+
+Sample code to make a few simple PDF files of various page counts.
+
+
+## `make_simple.sh`
+
+An example shell script that does the exact same thing as `makesimple.py`,
+but by  using the `ps2pdf` script on the command line.
+
+
+To contribute more...
+
+Feel free to add any type of PDF file or sample code, either by
+
+	1) sending it via email to [email protected]
+	2) including it in a pull request on GitHub
diff --git a/Sample_Code/README.txt b/Sample_Code/README.txt
diff --git a/Sample_Code/basic_features.py b/Sample_Code/basic_features.py
@@ -1,3 +1,12 @@
+"""
+Sample code that demonstrates:
+
+* Getting metadata from a PDF.
+* Copying a PDF, one page at a time, and performing different operations on each page (resize, rotate, add a watermark).
+* Encrypting a PDF.
+* Adding javascript that runs when the PDF is opened.
+"""
+
 from PyPDF2 import PdfFileWriter, PdfFileReader
 
 output = PdfFileWriter()

diff --git a/Sample_Code/basic_merging.py b/Sample_Code/basic_merging.py
@@ -1,3 +1,8 @@
+"""
+Sample code that demonstrates merging together three PDFs into one, picking and choosing which pages appear in which order.
+Selected pages can be added to the end of the output PDF being built, or inserted in the middle.
+"""
+
 from PyPDF2 import PdfFileMerger
 
 merger = PdfFileMerger()

diff --git a/Sample_Code/fillable_fields.py b/Sample_Code/fillable_fields.py
@@ -0,0 +1,63 @@
+"""
+Sample code that copies a PDF, changing field values along the way (i.e. using a PDF with fillable fields as a template).
+"""
+import sys
+from PyPDF2 import PdfFileWriter, PdfFileReader
+
+root_folder = "Sample_Code/"
+template_name = "fillable_form.pdf"
+
+
+def discover_fields(template_pdf, just_text=True):
+    if just_text:
+        available_fields = template_pdf.getFormTextFields()
+    else:
+        available_fields = template_pdf.getFields()
+    if available_fields:
+        for fieldname in available_fields:
+            print(fieldname)
+    else:
+        print("ERROR: '" + template_name + "' has no text fields.")
+        sys.exit(1)
+
+def fill_in_pdf(template_pdf, field_values, filename):
+    output = PdfFileWriter()
+    output.have_viewer_render_fields()
+    for page_no in range(template_pdf.getNumPages()):
+        template_page = template_pdf.getPage(0)
+        output.addPage(template_page)
+        page = output.getPage(page_no)
+        output.updatePageFormFieldValues(page, field_values, read_only=True)
+    output_stream = open(filename, "wb")
+    output.write(output_stream)
+    output_stream.close()
+
+template_pdf = PdfFileReader(open(root_folder + template_name, "rb"), strict=False)
+
+employee_john = {
+    "employee_name": "John Hardworker",
+    "employee_id": "0123",
+    "department": "Human Resources",
+    "manager_name": "Doris Stickler",
+    "manager_id": "0072"
+}
+employee_cyndi = {
+    "employee_name": "Cyndi Smartworker",
+    "employee_id": "0199",
+    "department": "Engineering",
+    "manager_name": "Steven Wright",
+    "manager_id": "0051"
+}
+
+
+discover_fields(template_pdf)
+
+fill_in_pdf(template_pdf, employee_john, root_folder + "JohnHardworder.pdf")
+
+# fill_in_pdf(template_pdf, employee_cyndi, root_folder + "CyndiSmartworker.pdf")
+# FIXME: If you uncomment this second call, you get:
+#   File "C:\forks\PyPDF2\PyPDF2\pdf.py", line 594, in _sweepIndirectReferences
+#   if data.pdf.stream.closed:
+#   AttributeError: 'PdfFileWriter' object has no attribute 'stream'
+
+
diff --git a/Sample_Code/fillable_form.odt b/Sample_Code/fillable_form.odt
diff --git a/Sample_Code/fillable_form.pdf b/Sample_Code/fillable_form.pdf
diff --git a/Sample_Code/make_simple.py b/Sample_Code/make_simple.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+"""Sample code to make a few simple pdf files of various page counts."""
+
+from __future__ import print_function
+from sys import argv
+
+from reportlab.pdfgen import canvas
+
+POINT = 1
+INCH = 72
+
+TEXT = """%s    page %d of %d
+
+a wonderful file
+created with Sample_Code/makesimple.py"""
+
+
+def make_pdf_file(output_filename, page_count):
+    title = output_filename
+    c = canvas.Canvas(output_filename, pagesize=(8.5 * INCH, 11 * INCH))
+    c.setStrokeColorRGB(0,0,0)
+    c.setFillColorRGB(0,0,0)
+    c.setFont("Helvetica", 12 * POINT)
+    for page_no in range(1, page_count + 1):
+        # The x-axis is at the bottom of the page, so 10 * INCH is 1 inch from the top.
+        vertical_pos = 10 * INCH
+        left_margin = 1 * INCH
+        for text_line in (TEXT % (output_filename, page_no, page_count)).split( '\n' ):
+            c.drawString(left_margin, vertical_pos, text_line)
+            vertical_pos -= 12 * POINT
+        c.showPage()
+    c.save()
+
+if __name__ == "__main__":
+    target_page_counts = [None, 5, 11, 17]
+    for i, target_page_count in enumerate(target_page_counts):
+        if target_page_count:
+            filename = "simple%d.pdf" % i
+            make_pdf_file(filename, target_page_count)
+            print ("Wrote", filename)
diff --git a/Sample_Code/make_simple.sh b/Sample_Code/make_simple.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# An example shell script that demonstrates using `ps2pdf` on the command line
+sample=1
+for pagecount in 5 11 17; do
+   page=1
+   f=simple$sample.pdf
+   while expr $page \<= $pagecount > /dev/null; do
+     if [ $page != 1 ]; then
+       echo "\c"
+      fi
+     echo "$f           page $page of $pagecount"
+     echo ""
+     echo "an incredible, yet simple example"
+     echo "Created with Sample_Code/makesimple.sh"
+     page=$(expr $page + 1)
+    done | enscript --no-header -o - |ps2pdf - $f
+   echo $f
+   sample=$(expr $sample + 1)
+ done
diff --git a/Sample_Code/makesimple.py b/Sample_Code/makesimple.py
diff --git a/Sample_Code/makesimple.sh b/Sample_Code/makesimple.sh