From 2aea3e97b89c4c7f4845b888a2dc1fc647db7576 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 13 Jun 2022 21:09:51 +0200
Subject: [PATCH] fix  'utf-16-be' codec can't decode bytes in position 0-1:
 unexpected end of data

use surrogatepass  in _cmap and _page
---
 PyPDF2/__init__.py      |    2 +-
 PyPDF2/_cmap.py         |  604 ++++----
 PyPDF2/_page.py         | 2945 +++++++++++++++++++--------------------
 PyPDF2/_utils.py        |  662 ++++-----
 tests/test_page.py      |    1 +
 tests/test_reader.py    |    1 +
 tests/test_workflows.py |   13 +-
 7 files changed, 2123 insertions(+), 2105 deletions(-)

diff --git a/PyPDF2/__init__.py b/PyPDF2/__init__.py
index d9759231d..09076ba8d 100644
--- a/PyPDF2/__init__.py
+++ b/PyPDF2/__init__.py
@@ -1,5 +1,5 @@
 from ._merger import PdfFileMerger, PdfMerger
-from ._page import Transformation, PageObject
+from ._page import PageObject, Transformation
 from ._reader import DocumentInformation, PdfFileReader, PdfReader
 from ._version import __version__
 from ._writer import PdfFileWriter, PdfWriter
diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py
index 32b9fcc41..bb9c477a1 100644
--- a/PyPDF2/_cmap.py
+++ b/PyPDF2/_cmap.py
@@ -1,294 +1,310 @@
-import warnings
-from binascii import unhexlify
-from typing import Any, Dict, List, Tuple, Union, cast
-
-from ._adobe_glyphs import adobe_glyphs
-from .errors import PdfReadWarning
-from .generic import DecodedStreamObject, DictionaryObject, charset_encoding
-
-
-# code freely inspired from @twiggy ; see #711
-def build_char_map(
-    font_name: str, space_width: float, obj: DictionaryObject
-) -> Tuple[
-    str, float, Union[str, Dict[int, str]], Dict
-]:  # font_type,space_width /2, encoding, cmap
-    ft: DictionaryObject = obj["/Resources"]["/Font"][font_name]  # type: ignore
-    font_type: str = cast(str, ft["/Subtype"])
-
-    space_code = 32
-    encoding, space_code = parse_encoding(ft, space_code)
-    map_dict, space_code, int_entry = parse_to_unicode(ft, space_code)
-
-    # encoding can be either a string for decode (on 1,2 or a variable number of bytes) of a char table (for 1 byte only for me)
-    # if empty string, it means it is than encoding field is not present and we have to select the good encoding from cmap input data
-    if encoding == "":
-        if -1 not in map_dict or map_dict[-1] == 1:
-            # I have not been able to find any rule for no /Encoding nor /ToUnicode
-            # One example shows /Symbol,bold I consider 8 bits encoding default
-            encoding = "charmap"
-        else:
-            encoding = "utf-16-be"
-    # apply rule from PDF ref 1.7 §5.9.1, 1st bullet : if cmap not empty encoding should be discarded (here transformed into identity for those characters)
-    # if encoding is an str it is expected to be a identity translation
-    elif isinstance(encoding, dict):
-        for x in int_entry:
-            if x <= 255:
-                encoding[x] = chr(x)
-    if font_name in _default_fonts_space_width:
-        # override space_width with new params
-        space_width = _default_fonts_space_width[font_name]
-    sp_width = compute_space_width(ft, space_code, space_width)
-
-    return (
-        font_type,
-        float(sp_width / 2),
-        encoding,
-        # https://github.com/python/mypy/issues/4374
-        map_dict,  # type: ignore
-    )  # type: ignore
-
-
-# used when missing data, e.g. font def missing
-unknown_char_map : Tuple[str, float, Union[str, Dict[int, str]], Dict] = (
-    "Unknown", 9999, dict(zip(range(256), ["�"] * 256)), {}
-)
-
-
-_predefined_cmap: Dict[str, str] = {
-    "/Identity-H": "utf-16-be",
-    "/Identity-V": "utf-16-be",
-    "/GB-EUC-H": "gbk",  # TBC
-    "/GB-EUC-V": "gbk",  # TBC
-    "/GBpc-EUC-H": "gb2312",  # TBC
-    "/GBpc-EUC-V": "gb2312",  # TBC
-}
-
-
-# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
-_default_fonts_space_width: Dict[str, int] = {
-    "/Courrier": 600,
-    "/Courier-Bold": 600,
-    "/Courier-BoldOblique": 600,
-    "/Courier-Oblique": 600,
-    "/Helvetica": 278,
-    "/Helvetica-Bold": 278,
-    "/Helvetica-BoldOblique": 278,
-    "/Helvetica-Oblique": 278,
-    "/Helvetica-Narrow": 228,
-    "/Helvetica-NarrowBold": 228,
-    "/Helvetica-NarrowBoldOblique": 228,
-    "/Helvetica-NarrowOblique": 228,
-    "/Times-Roman": 250,
-    "/Times-Bold": 250,
-    "/Times-BoldItalic": 250,
-    "/Times-Italic": 250,
-    "/Symbol": 250,
-    "/ZapfDingbats": 278,
-}
-
-
-def parse_encoding(
-    ft: DictionaryObject, space_code: int
-) -> Tuple[Union[str, Dict[int, str]], int]:
-    encoding: Union[str, List[str], Dict[int, str]] = []
-    if "/Encoding" not in ft:
-        try:
-            if "/BaseFont" in ft and ft["/BaseFont"] in charset_encoding:
-                encoding = dict(zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])]))
-            else:
-                encoding = "charmap"
-            return encoding, _default_fonts_space_width[cast(str, ft["/BaseFont"])]
-        except Exception:
-            if ft["/Subtype"] == "/Type1":
-                return "charmap", space_code
-            else:
-                return "", space_code
-    enc: Union(str, DictionaryObject) = ft["/Encoding"].get_object()  # type: ignore
-    if isinstance(enc, str):
-        try:
-            if enc in charset_encoding:
-                encoding = charset_encoding[enc].copy()
-            elif enc in _predefined_cmap:
-                encoding = _predefined_cmap[enc]
-            else:
-                raise Exception("not found")
-        except Exception:
-            warnings.warn(
-                f"Advanced encoding {enc} not implemented yet",
-                PdfReadWarning,
-            )
-            encoding = enc
-    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
-        try:
-            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
-        except Exception:
-            warnings.warn(
-                f"Advanced encoding {encoding} not implemented yet",
-                PdfReadWarning,
-            )
-            encoding = charset_encoding["/StandardCoding"].copy()
-    else:
-        encoding = charset_encoding["/StandardCoding"].copy()
-    if "/Differences" in enc:
-        x: int = 0
-        o: Union[int, str]
-        for o in cast(DictionaryObject, cast(DictionaryObject, enc)["/Differences"]):
-            if isinstance(o, int):
-                x = o
-            else:  # isinstance(o,str):
-                try:
-                    encoding[x] = adobe_glyphs[o]  # type: ignore
-                except Exception:
-                    encoding[x] = o  # type: ignore
-                    if o == " ":
-                        space_code = x
-                x += 1
-    if isinstance(encoding, list):
-        encoding = dict(zip(range(256), encoding))
-    return encoding, space_code
-
-
-def parse_to_unicode(ft: DictionaryObject, space_code: int) -> Tuple[Dict, int, List[int]]:
-    map_dict: Dict[
-        Any, Any
-    ] = (
-        {}
-    )  # will store all translation code and map_dict[-1] we will have the number of bytes to convert
-    int_entry : List[int] = []  # will provide the list of cmap keys as int to correct encoding
-    if "/ToUnicode" not in ft:
-        return {}, space_code, []
-    process_rg: bool = False
-    process_char: bool = False
-    cm: bytes = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
-    # we need to prepare cm before due to missing return line in pdf printed to pdf from word
-    cm = (
-        cm.strip()
-        .replace(b"beginbfchar", b"\nbeginbfchar\n")
-        .replace(b"endbfchar", b"\nendbfchar\n")
-        .replace(b"beginbfrange", b"\nbeginbfrange\n")
-        .replace(b"endbfrange", b"\nendbfrange\n")
-        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
-        .replace(b">>", b"\n}\n")  # some solution to find it back
-    )
-    ll = cm.split(b"<")
-    for i in range(len(ll)):
-        j = ll[i].find(b">")
-        if j >= 0:
-            ll[i] = ll[i][:j].replace(b" ", b"") + b" " + ll[i][j + 1 :]
-    cm = (b" ".join(ll)).replace(b"[", b" [ ").replace(b"]", b" ]\n ").replace(b"\r", b"\n")
-
-    for l in cm.split(b"\n"):
-        if l in (b"", b" "):
-            continue
-        if b"beginbfrange" in l:
-            process_rg = True
-        elif b"endbfrange" in l:
-            process_rg = False
-        elif b"beginbfchar" in l:
-            process_char = True
-        elif b"endbfchar" in l:
-            process_char = False
-        elif process_rg:
-            lst = [x for x in l.split(b" ") if x]
-            a = int(lst[0], 16)
-            b = int(lst[1], 16)
-            nbi = len(lst[0])
-            map_dict[-1] = nbi // 2
-            fmt = b"%%0%dX" % nbi
-            if lst[2] == b"[":
-                for sq in lst[3:]:
-                    if sq == b"]":
-                        break
-                    map_dict[
-                        unhexlify(fmt % a).decode(
-                            "charmap" if map_dict[-1] == 1 else "utf-16-be"
-                        )
-                    ] = unhexlify(sq).decode("utf-16-be")
-                    int_entry.append(a)
-                    a += 1
-                    assert a > b
-            else:
-                c = int(lst[2], 16)
-                fmt2 = b"%%0%dX" % len(lst[2])
-                while a <= b:
-                    map_dict[
-                        unhexlify(fmt % a).decode(
-                            "charmap" if map_dict[-1] == 1 else "utf-16-be"
-                        )
-                    ] = unhexlify(fmt2 % c).decode("utf-16-be")
-                    int_entry.append(a)
-                    a += 1
-                    c += 1
-        elif process_char:
-            lst = [x for x in l.split(b" ") if x]
-            map_dict[-1] = len(lst[0]) // 2
-            while len(lst) > 0:
-                map_dict[
-                    unhexlify(lst[0]).decode(
-                        "charmap" if map_dict[-1] == 1 else "utf-16-be"
-                    )
-                ] = unhexlify(lst[1]).decode(
-                    "utf-16-be"
-                )  # join is here as some cases where the code was split
-                int_entry.append(int(lst[0], 16))
-                lst = lst[2:]
-    for a in map_dict:
-        if map_dict[a] == " ":
-            space_code = a
-    return map_dict, space_code, int_entry
-
-
-def compute_space_width(
-    ft: DictionaryObject, space_code: int, space_width: float
-) -> float:
-    sp_width: float = space_width * 2  # default value
-    w = []
-    st: int = 0
-    if "/W" in ft:
-        if "/DW" in ft:
-            sp_width = cast(float, ft["/DW"])
-        w = list(ft["/W"])  # type: ignore
-        while len(w) > 0:
-            st = w[0]
-            second = w[1]
-            if isinstance(int, second):
-                if st <= space_code and space_code <= second:
-                    sp_width = w[2]
-                    break
-                w = w[3:]
-            if isinstance(list, second):
-                if st <= space_code and space_code <= st + len(second) - 1:
-                    sp_width = second[space_code - st]
-                w = w[2:]
-            else:
-                warnings.warn(
-                    "unknown widths : \n" + (ft["/W"]).__repr__(),
-                    PdfReadWarning,
-                )
-                break
-    if "/Widths" in ft:
-        w = list(ft["/Widths"])  # type: ignore
-        try:
-            st = cast(int, ft["/FirstChar"])
-            en: int = cast(int, ft["/LastChar"])
-            if st > space_code or en < space_code:
-                raise Exception("Not in range")
-            if w[space_code - st] == 0:
-                raise Exception("null width")
-            sp_width = w[space_code - st]
-        except Exception:
-            if "/FontDescriptor" in ft and "/MissingWidth" in cast(
-                DictionaryObject, ft["/FontDescriptor"]
-            ):
-                sp_width = ft["/FontDescriptor"]["/MissingWidth"]  # type: ignore
-            else:
-                # will consider width of char as avg(width)/2
-                m = 0
-                cpt = 0
-                for x in w:
-                    if x > 0:
-                        m += x
-                        cpt += 1
-                sp_width = m / max(1, cpt) / 2
-    return sp_width
+import warnings
+from binascii import unhexlify
+from typing import Any, Dict, List, Tuple, Union, cast
+
+from ._adobe_glyphs import adobe_glyphs
+from .errors import PdfReadWarning
+from .generic import DecodedStreamObject, DictionaryObject, charset_encoding
+
+
+# code freely inspired from @twiggy ; see #711
+def build_char_map(
+    font_name: str, space_width: float, obj: DictionaryObject
+) -> Tuple[
+    str, float, Union[str, Dict[int, str]], Dict
+]:  # font_type,space_width /2, encoding, cmap
+    ft: DictionaryObject = obj["/Resources"]["/Font"][font_name]  # type: ignore
+    font_type: str = cast(str, ft["/Subtype"])
+
+    space_code = 32
+    encoding, space_code = parse_encoding(ft, space_code)
+    map_dict, space_code, int_entry = parse_to_unicode(ft, space_code)
+
+    # encoding can be either a string for decode (on 1,2 or a variable number of bytes) of a char table (for 1 byte only for me)
+    # if empty string, it means it is than encoding field is not present and we have to select the good encoding from cmap input data
+    if encoding == "":
+        if -1 not in map_dict or map_dict[-1] == 1:
+            # I have not been able to find any rule for no /Encoding nor /ToUnicode
+            # One example shows /Symbol,bold I consider 8 bits encoding default
+            encoding = "charmap"
+        else:
+            encoding = "utf-16-be"
+    # apply rule from PDF ref 1.7 §5.9.1, 1st bullet : if cmap not empty encoding should be discarded (here transformed into identity for those characters)
+    # if encoding is an str it is expected to be a identity translation
+    elif isinstance(encoding, dict):
+        for x in int_entry:
+            if x <= 255:
+                encoding[x] = chr(x)
+    if font_name in _default_fonts_space_width:
+        # override space_width with new params
+        space_width = _default_fonts_space_width[font_name]
+    sp_width = compute_space_width(ft, space_code, space_width)
+
+    return (
+        font_type,
+        float(sp_width / 2),
+        encoding,
+        # https://github.com/python/mypy/issues/4374
+        map_dict,  # type: ignore
+    )  # type: ignore
+
+
+# used when missing data, e.g. font def missing
+unknown_char_map: Tuple[str, float, Union[str, Dict[int, str]], Dict] = (
+    "Unknown",
+    9999,
+    dict(zip(range(256), ["�"] * 256)),
+    {},
+)
+
+
+_predefined_cmap: Dict[str, str] = {
+    "/Identity-H": "utf-16-be",
+    "/Identity-V": "utf-16-be",
+    "/GB-EUC-H": "gbk",  # TBC
+    "/GB-EUC-V": "gbk",  # TBC
+    "/GBpc-EUC-H": "gb2312",  # TBC
+    "/GBpc-EUC-V": "gb2312",  # TBC
+}
+
+
+# manually extracted from http://mirrors.ctan.org/fonts/adobe/afm/Adobe-Core35_AFMs-229.tar.gz
+_default_fonts_space_width: Dict[str, int] = {
+    "/Courrier": 600,
+    "/Courier-Bold": 600,
+    "/Courier-BoldOblique": 600,
+    "/Courier-Oblique": 600,
+    "/Helvetica": 278,
+    "/Helvetica-Bold": 278,
+    "/Helvetica-BoldOblique": 278,
+    "/Helvetica-Oblique": 278,
+    "/Helvetica-Narrow": 228,
+    "/Helvetica-NarrowBold": 228,
+    "/Helvetica-NarrowBoldOblique": 228,
+    "/Helvetica-NarrowOblique": 228,
+    "/Times-Roman": 250,
+    "/Times-Bold": 250,
+    "/Times-BoldItalic": 250,
+    "/Times-Italic": 250,
+    "/Symbol": 250,
+    "/ZapfDingbats": 278,
+}
+
+
+def parse_encoding(
+    ft: DictionaryObject, space_code: int
+) -> Tuple[Union[str, Dict[int, str]], int]:
+    encoding: Union[str, List[str], Dict[int, str]] = []
+    if "/Encoding" not in ft:
+        try:
+            if "/BaseFont" in ft and ft["/BaseFont"] in charset_encoding:
+                encoding = dict(
+                    zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])])
+                )
+            else:
+                encoding = "charmap"
+            return encoding, _default_fonts_space_width[cast(str, ft["/BaseFont"])]
+        except Exception:
+            if ft["/Subtype"] == "/Type1":
+                return "charmap", space_code
+            else:
+                return "", space_code
+    enc: Union(str, DictionaryObject) = ft["/Encoding"].get_object()  # type: ignore
+    if isinstance(enc, str):
+        try:
+            if enc in charset_encoding:
+                encoding = charset_encoding[enc].copy()
+            elif enc in _predefined_cmap:
+                encoding = _predefined_cmap[enc]
+            else:
+                raise Exception("not found")
+        except Exception:
+            warnings.warn(
+                f"Advanced encoding {enc} not implemented yet",
+                PdfReadWarning,
+            )
+            encoding = enc
+    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
+        try:
+            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
+        except Exception:
+            warnings.warn(
+                f"Advanced encoding {encoding} not implemented yet",
+                PdfReadWarning,
+            )
+            encoding = charset_encoding["/StandardCoding"].copy()
+    else:
+        encoding = charset_encoding["/StandardCoding"].copy()
+    if "/Differences" in enc:
+        x: int = 0
+        o: Union[int, str]
+        for o in cast(DictionaryObject, cast(DictionaryObject, enc)["/Differences"]):
+            if isinstance(o, int):
+                x = o
+            else:  # isinstance(o,str):
+                try:
+                    encoding[x] = adobe_glyphs[o]  # type: ignore
+                except Exception:
+                    encoding[x] = o  # type: ignore
+                    if o == " ":
+                        space_code = x
+                x += 1
+    if isinstance(encoding, list):
+        encoding = dict(zip(range(256), encoding))
+    return encoding, space_code
+
+
+def parse_to_unicode(
+    ft: DictionaryObject, space_code: int
+) -> Tuple[Dict, int, List[int]]:
+    map_dict: Dict[
+        Any, Any
+    ] = (
+        {}
+    )  # will store all translation code and map_dict[-1] we will have the number of bytes to convert
+    int_entry: List[
+        int
+    ] = []  # will provide the list of cmap keys as int to correct encoding
+    if "/ToUnicode" not in ft:
+        return {}, space_code, []
+    process_rg: bool = False
+    process_char: bool = False
+    cm: bytes = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
+    # we need to prepare cm before due to missing return line in pdf printed to pdf from word
+    cm = (
+        cm.strip()
+        .replace(b"beginbfchar", b"\nbeginbfchar\n")
+        .replace(b"endbfchar", b"\nendbfchar\n")
+        .replace(b"beginbfrange", b"\nbeginbfrange\n")
+        .replace(b"endbfrange", b"\nendbfrange\n")
+        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
+        .replace(b">>", b"\n}\n")  # some solution to find it back
+    )
+    ll = cm.split(b"<")
+    for i in range(len(ll)):
+        j = ll[i].find(b">")
+        if j >= 0:
+            ll[i] = ll[i][:j].replace(b" ", b"") + b" " + ll[i][j + 1 :]
+    cm = (
+        (b" ".join(ll))
+        .replace(b"[", b" [ ")
+        .replace(b"]", b" ]\n ")
+        .replace(b"\r", b"\n")
+    )
+
+    for l in cm.split(b"\n"):
+        if l in (b"", b" "):
+            continue
+        if b"beginbfrange" in l:
+            process_rg = True
+        elif b"endbfrange" in l:
+            process_rg = False
+        elif b"beginbfchar" in l:
+            process_char = True
+        elif b"endbfchar" in l:
+            process_char = False
+        elif process_rg:
+            lst = [x for x in l.split(b" ") if x]
+            a = int(lst[0], 16)
+            b = int(lst[1], 16)
+            nbi = len(lst[0])
+            map_dict[-1] = nbi // 2
+            fmt = b"%%0%dX" % nbi
+            if lst[2] == b"[":
+                for sq in lst[3:]:
+                    if sq == b"]":
+                        break
+                    map_dict[
+                        unhexlify(fmt % a).decode(
+                            "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                            "surrogatepass",
+                        )
+                    ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+                    int_entry.append(a)
+                    a += 1
+                    assert a > b
+            else:
+                c = int(lst[2], 16)
+                fmt2 = b"%%0%dX" % len(lst[2])
+                while a <= b:
+                    map_dict[
+                        unhexlify(fmt % a).decode(
+                            "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                            "surrogatepass",
+                        )
+                    ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
+                    int_entry.append(a)
+                    a += 1
+                    c += 1
+        elif process_char:
+            lst = [x for x in l.split(b" ") if x]
+            map_dict[-1] = len(lst[0]) // 2
+            while len(lst) > 0:
+                map_dict[
+                    unhexlify(lst[0]).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"
+                    )
+                ] = unhexlify(lst[1]).decode(
+                    "utf-16-be", "surrogatepass"
+                )  # join is here as some cases where the code was split
+                int_entry.append(int(lst[0], 16))
+                lst = lst[2:]
+    for a in map_dict:
+        if map_dict[a] == " ":
+            space_code = a
+    return map_dict, space_code, int_entry
+
+
+def compute_space_width(
+    ft: DictionaryObject, space_code: int, space_width: float
+) -> float:
+    sp_width: float = space_width * 2  # default value
+    w = []
+    st: int = 0
+    if "/W" in ft:
+        if "/DW" in ft:
+            sp_width = cast(float, ft["/DW"])
+        w = list(ft["/W"])  # type: ignore
+        while len(w) > 0:
+            st = w[0]
+            second = w[1]
+            if isinstance(int, second):
+                if st <= space_code and space_code <= second:
+                    sp_width = w[2]
+                    break
+                w = w[3:]
+            if isinstance(list, second):
+                if st <= space_code and space_code <= st + len(second) - 1:
+                    sp_width = second[space_code - st]
+                w = w[2:]
+            else:
+                warnings.warn(
+                    "unknown widths : \n" + (ft["/W"]).__repr__(),
+                    PdfReadWarning,
+                )
+                break
+    if "/Widths" in ft:
+        w = list(ft["/Widths"])  # type: ignore
+        try:
+            st = cast(int, ft["/FirstChar"])
+            en: int = cast(int, ft["/LastChar"])
+            if st > space_code or en < space_code:
+                raise Exception("Not in range")
+            if w[space_code - st] == 0:
+                raise Exception("null width")
+            sp_width = w[space_code - st]
+        except Exception:
+            if "/FontDescriptor" in ft and "/MissingWidth" in cast(
+                DictionaryObject, ft["/FontDescriptor"]
+            ):
+                sp_width = ft["/FontDescriptor"]["/MissingWidth"]  # type: ignore
+            else:
+                # will consider width of char as avg(width)/2
+                m = 0
+                cpt = 0
+                for x in w:
+                    if x > 0:
+                        m += x
+                        cpt += 1
+                sp_width = m / max(1, cpt) / 2
+    return sp_width
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
index 1570439ca..031aa1471 100644
--- a/PyPDF2/_page.py
+++ b/PyPDF2/_page.py
@@ -1,1473 +1,1472 @@
-# Copyright (c) 2006, Mathieu Fenniak
-# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import math
-import uuid
-import warnings
-from decimal import Decimal
-# from math import sqrt
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
-
-from ._cmap import build_char_map, unknown_char_map
-from ._utils import (
-    CompressedTransformationMatrix,
-    TransformationMatrixType,
-    deprecate_no_replacement,
-    deprecate_with_replacement,
-    matrix_multiply,
-)
-from .constants import PageAttributes as PG
-from .constants import Ressources as RES
-from .errors import PageSizeNotDefinedError, PdfReadWarning
-from .generic import (
-    ArrayObject,
-    ContentStream,
-    DictionaryObject,
-    EncodedStreamObject,
-    FloatObject,
-    IndirectObject,
-    NameObject,
-    NullObject,
-    NumberObject,
-    RectangleObject,
-    TextStringObject,
-    encode_pdfdocencoding,
-)
-
-
-def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
-    retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
-    if isinstance(retval, RectangleObject):
-        return retval
-    if retval is None:
-        for d in defaults:
-            retval = self.get(d)
-            if retval is not None:
-                break
-    if isinstance(retval, IndirectObject):
-        retval = self.pdf.get_object(retval)
-    retval = RectangleObject(retval)  # type: ignore
-    _set_rectangle(self, name, retval)
-    return retval
-
-
-def getRectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
-    deprecate_no_replacement("getRectangle")
-    return _get_rectangle(self, name, defaults)
-
-
-def _set_rectangle(self: Any, name: str, value: Union[RectangleObject, float]) -> None:
-    if not isinstance(name, NameObject):
-        name = NameObject(name)
-    self[name] = value
-
-
-def setRectangle(self: Any, name: str, value: Union[RectangleObject, float]) -> None:
-    deprecate_no_replacement("setRectangle")
-    _set_rectangle(self, name, value)
-
-
-def _delete_rectangle(self: Any, name: str) -> None:
-    del self[name]
-
-
-def deleteRectangle(self: Any, name: str) -> None:
-    deprecate_no_replacement("deleteRectangle")
-    del self[name]
-
-
-def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property:
-    return property(
-        lambda self: _get_rectangle(self, name, fallback),
-        lambda self, value: _set_rectangle(self, name, value),
-        lambda self: _delete_rectangle(self, name),
-    )
-
-
-def createRectangleAccessor(name: str, fallback: Iterable[str]) -> property:
-    deprecate_no_replacement("createRectangleAccessor")
-    return _create_rectangle_accessor(name, fallback)
-
-
-class Transformation:
-    """
-    Specify a 2D transformation.
-
-    The transformation between two coordinate systems is represented by a 3-by-3
-    transformation matrix written as follows::
-
-        a b 0
-        c d 0
-        e f 1
-
-    Because a transformation matrix has only six elements that can be changed,
-    it is usually specified in PDF as the six-element array [ a b c d e f ].
-
-    Coordinate transformations are expressed as matrix multiplications::
-
-                                 a b 0
-     [ x′ y′ 1 ] = [ x y 1 ] ×   c d 0
-                                 e f 1
-
-
-    Usage
-    -----
-
-        >>> from PyPDF2 import Transformation
-        >>> op = Transformation().scale(sx=2, sy=3).translate(tx=10, ty=20)
-        >>> page.add_transformation(op)
-    """
-
-    # 9.5.4 Coordinate Systems for 3D
-    # 4.2.2 Common Transformations
-    def __init__(self, ctm: CompressedTransformationMatrix = (1, 0, 0, 1, 0, 0)):
-        self.ctm = ctm
-
-    @property
-    def matrix(self) -> TransformationMatrixType:
-        return (
-            (self.ctm[0], self.ctm[1], 0),
-            (self.ctm[2], self.ctm[3], 0),
-            (self.ctm[4], self.ctm[5], 1),
-        )
-
-    @staticmethod
-    def compress(matrix: TransformationMatrixType) -> CompressedTransformationMatrix:
-        return (
-            matrix[0][0],
-            matrix[0][1],
-            matrix[1][0],
-            matrix[1][1],
-            matrix[0][2],
-            matrix[1][2],
-        )
-
-    def translate(self, tx: float = 0, ty: float = 0) -> "Transformation":
-        m = self.ctm
-        return Transformation(ctm=(m[0], m[1], m[2], m[3], m[4] + tx, m[5] + ty))
-
-    def scale(
-        self, sx: Optional[float] = None, sy: Optional[float] = None
-    ) -> "Transformation":
-        if sx is None and sy is None:
-            raise ValueError("Either sx or sy must be specified")
-        if sx is None:
-            sx = sy
-        if sy is None:
-            sy = sx
-        assert sx is not None
-        assert sy is not None
-        op: TransformationMatrixType = ((sx, 0, 0), (0, sy, 0), (0, 0, 1))
-        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
-        return Transformation(ctm)
-
-    def rotate(self, rotation: float) -> "Transformation":
-        rotation = math.radians(rotation)
-        op: TransformationMatrixType = (
-            (math.cos(rotation), math.sin(rotation), 0),
-            (-math.sin(rotation), math.cos(rotation), 0),
-            (0, 0, 1),
-        )
-        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
-        return Transformation(ctm)
-
-    def __repr__(self) -> str:
-        return f"Transformation(ctm={self.ctm})"
-
-
-class PageObject(DictionaryObject):
-    """
-    PageObject represents a single page within a PDF file.
-
-    Typically this object will be created by accessing the
-    :meth:`get_page()<PyPDF2.PdfReader.get_page>` method of the
-    :class:`PdfReader<PyPDF2.PdfReader>` class, but it is
-    also possible to create an empty page with the
-    :meth:`create_blank_page()<PyPDF2._page.PageObject.create_blank_page>` static method.
-
-    :param pdf: PDF file the page belongs to.
-    :param indirect_ref: Stores the original indirect reference to
-        this object in its source PDF
-    """
-
-    def __init__(
-        self,
-        pdf: Optional[Any] = None,  # PdfReader
-        indirect_ref: Optional[IndirectObject] = None,
-    ) -> None:
-        from ._reader import PdfReader
-
-        DictionaryObject.__init__(self)
-        self.pdf: Optional[PdfReader] = pdf
-        self.indirect_ref = indirect_ref
-
-    @staticmethod
-    def create_blank_page(
-        pdf: Optional[Any] = None,  # PdfReader
-        width: Union[float, Decimal, None] = None,
-        height: Union[float, Decimal, None] = None,
-    ) -> "PageObject":
-        """
-        Return a new blank page.
-
-        If ``width`` or ``height`` is ``None``, try to get the page size
-        from the last page of *pdf*.
-
-        :param pdf: PDF file the page belongs to
-        :param float width: The width of the new page expressed in default user
-            space units.
-        :param float height: The height of the new page expressed in default user
-            space units.
-        :return: the new blank page:
-        :rtype: :class:`PageObject<PageObject>`
-        :raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
-            no page
-        """
-        page = PageObject(pdf)
-
-        # Creates a new page (cf PDF Reference  7.7.3.3)
-        page.__setitem__(NameObject(PG.TYPE), NameObject("/Page"))
-        page.__setitem__(NameObject(PG.PARENT), NullObject())
-        page.__setitem__(NameObject(PG.RESOURCES), DictionaryObject())
-        if width is None or height is None:
-            if pdf is not None and len(pdf.pages) > 0:
-                lastpage = pdf.pages[len(pdf.pages) - 1]
-                width = lastpage.mediabox.width
-                height = lastpage.mediabox.height
-            else:
-                raise PageSizeNotDefinedError
-        page.__setitem__(
-            NameObject(PG.MEDIABOX), RectangleObject((0, 0, width, height))  # type: ignore
-        )
-
-        return page
-
-    @staticmethod
-    def createBlankPage(
-        pdf: Optional[Any] = None,  # PdfReader
-        width: Union[float, Decimal, None] = None,
-        height: Union[float, Decimal, None] = None,
-    ) -> "PageObject":  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`create_blank_page` instead.
-        """
-        deprecate_with_replacement("createBlankPage", "create_blank_page")
-        return PageObject.create_blank_page(pdf, width, height)
-
-    def rotate(self, angle: float) -> "PageObject":
-        """
-        Rotate a page clockwise by increments of 90 degrees.
-
-        :param int angle: Angle to rotate the page.  Must be an increment
-            of 90 deg.
-        """
-        if angle % 90 != 0:
-            raise ValueError("Rotation angle must be a multiple of 90")
-        rotate_obj = self.get(PG.ROTATE, 0)
-        current_angle = (
-            rotate_obj if isinstance(rotate_obj, int) else rotate_obj.get_object()
-        )
-        self[NameObject(PG.ROTATE)] = NumberObject(current_angle + angle)
-        return self
-
-    def rotate_clockwise(self, angle: float) -> "PageObject":  # pragma: no cover
-        deprecate_with_replacement("rotate_clockwise", "rotate")
-        return self.rotate(angle)
-
-    def rotateClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`rotate_clockwise` instead.
-        """
-        deprecate_with_replacement("rotateClockwise", "rotate")
-        return self.rotate(angle)
-
-    def rotateCounterClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`rotate_clockwise` with a negative argument instead.
-        """
-        deprecate_with_replacement("rotateCounterClockwise", "rotate")
-        return self.rotate(-angle)
-
-    @staticmethod
-    def _merge_resources(
-        res1: DictionaryObject, res2: DictionaryObject, resource: Any
-    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        new_res = DictionaryObject()
-        new_res.update(res1.get(resource, DictionaryObject()).get_object())
-        page2res = cast(
-            DictionaryObject, res2.get(resource, DictionaryObject()).get_object()
-        )
-        rename_res = {}
-        for key in list(page2res.keys()):
-            if key in new_res and new_res.raw_get(key) != page2res.raw_get(key):
-                newname = NameObject(key + str(uuid.uuid4()))
-                rename_res[key] = newname
-                new_res[newname] = page2res[key]
-            elif key not in new_res:
-                new_res[key] = page2res.raw_get(key)
-        return new_res, rename_res
-
-    @staticmethod
-    def _content_stream_rename(
-        stream: ContentStream, rename: Dict[Any, Any], pdf: Any  # PdfReader
-    ) -> ContentStream:
-        if not rename:
-            return stream
-        stream = ContentStream(stream, pdf)
-        for operands, _operator in stream.operations:
-            if isinstance(operands, list):
-                for i in range(len(operands)):
-                    op = operands[i]
-                    if isinstance(op, NameObject):
-                        operands[i] = rename.get(op, op)
-            elif isinstance(operands, dict):
-                for i in operands:
-                    op = operands[i]
-                    if isinstance(op, NameObject):
-                        operands[i] = rename.get(op, op)
-            else:
-                raise KeyError("type of operands is %s" % type(operands))
-        return stream
-
-    @staticmethod
-    def _push_pop_gs(contents: Any, pdf: Any) -> ContentStream:  # PdfReader
-        # adds a graphics state "push" and "pop" to the beginning and end
-        # of a content stream.  This isolates it from changes such as
-        # transformation matricies.
-        stream = ContentStream(contents, pdf)
-        stream.operations.insert(0, ([], "q"))
-        stream.operations.append(([], "Q"))
-        return stream
-
-    @staticmethod
-    def _add_transformation_matrix(
-        contents: Any, pdf: Any, ctm: CompressedTransformationMatrix
-    ) -> ContentStream:  # PdfReader
-        # adds transformation matrix at the beginning of the given
-        # contents stream.
-        a, b, c, d, e, f = ctm
-        contents = ContentStream(contents, pdf)
-        contents.operations.insert(
-            0,
-            [
-                [
-                    FloatObject(a),
-                    FloatObject(b),
-                    FloatObject(c),
-                    FloatObject(d),
-                    FloatObject(e),
-                    FloatObject(f),
-                ],
-                " cm",
-            ],
-        )
-        return contents
-
-    def get_contents(self) -> Optional[ContentStream]:
-        """
-        Access the page contents.
-
-        :return: the ``/Contents`` object, or ``None`` if it doesn't exist.
-            ``/Contents`` is optional, as described in PDF Reference  7.7.3.3
-        """
-        if PG.CONTENTS in self:
-            return self[PG.CONTENTS].get_object()  # type: ignore
-        else:
-            return None
-
-    def getContents(self) -> Optional[ContentStream]:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`get_contents` instead.
-        """
-        deprecate_with_replacement("getContents", "get_contents")
-        return self.get_contents()
-
-    def merge_page(self, page2: "PageObject", expand: bool = False) -> None:
-        """
-        Merge the content streams of two pages into one.
-
-        Resource references
-        (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc
-        of this page are not altered.  The parameter page's content stream will
-        be added to the end of this page's content stream, meaning that it will
-        be drawn after, or "on top" of this page.
-
-        :param PageObject page2: The page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param bool expand: If true, the current page dimensions will be
-            expanded to accommodate the dimensions of the page to be merged.
-        """
-        self._merge_page(page2, expand=expand)
-
-    def mergePage(self, page2: "PageObject") -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement("mergePage", "merge_page")
-        return self.merge_page(page2)
-
-    def _merge_page(
-        self,
-        page2: "PageObject",
-        page2transformation: Optional[Callable[[Any], ContentStream]] = None,
-        ctm: Optional[CompressedTransformationMatrix] = None,
-        expand: bool = False,
-    ) -> None:
-        # First we work on merging the resource dictionaries.  This allows us
-        # to find out what symbols in the content streams we might need to
-        # rename.
-
-        new_resources = DictionaryObject()
-        rename = {}
-        original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object())
-        page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object())
-        new_annots = ArrayObject()
-
-        for page in (self, page2):
-            if PG.ANNOTS in page:
-                annots = page[PG.ANNOTS]
-                if isinstance(annots, ArrayObject):
-                    for ref in annots:
-                        new_annots.append(ref)
-
-        for res in (
-            RES.EXT_G_STATE,
-            RES.FONT,
-            RES.XOBJECT,
-            RES.COLOR_SPACE,
-            RES.PATTERN,
-            RES.SHADING,
-            RES.PROPERTIES,
-        ):
-            new, newrename = PageObject._merge_resources(
-                original_resources, page2resources, res
-            )
-            if new:
-                new_resources[NameObject(res)] = new
-                rename.update(newrename)
-
-        # Combine /ProcSet sets.
-        new_resources[NameObject(RES.PROC_SET)] = ArrayObject(
-            frozenset(
-                original_resources.get(RES.PROC_SET, ArrayObject()).get_object()  # type: ignore
-            ).union(
-                frozenset(page2resources.get(RES.PROC_SET, ArrayObject()).get_object())  # type: ignore
-            )
-        )
-
-        new_content_array = ArrayObject()
-
-        original_content = self.get_contents()
-        if original_content is not None:
-            new_content_array.append(
-                PageObject._push_pop_gs(original_content, self.pdf)
-            )
-
-        page2content = page2.get_contents()
-        if page2content is not None:
-            page2content = ContentStream(page2content, self.pdf)
-            page2content.operations.insert(
-                0,
-                (
-                    map(
-                        FloatObject,
-                        [
-                            page2.trimbox.left,
-                            page2.trimbox.bottom,
-                            page2.trimbox.width,
-                            page2.trimbox.height,
-                        ],
-                    ),
-                    "re",
-                ),
-            )
-            page2content.operations.insert(1, ([], "W"))
-            page2content.operations.insert(2, ([], "n"))
-            if page2transformation is not None:
-                page2content = page2transformation(page2content)
-            page2content = PageObject._content_stream_rename(
-                page2content, rename, self.pdf
-            )
-            page2content = PageObject._push_pop_gs(page2content, self.pdf)
-            new_content_array.append(page2content)
-
-        # if expanding the page to fit a new page, calculate the new media box size
-        if expand:
-            self._expand_mediabox(page2, ctm)
-
-        self[NameObject(PG.CONTENTS)] = ContentStream(new_content_array, self.pdf)
-        self[NameObject(PG.RESOURCES)] = new_resources
-        self[NameObject(PG.ANNOTS)] = new_annots
-
-    def _expand_mediabox(
-        self, page2: "PageObject", ctm: Optional[CompressedTransformationMatrix]
-    ) -> None:
-        corners1 = (
-            self.mediabox.left.as_numeric(),
-            self.mediabox.bottom.as_numeric(),
-            self.mediabox.right.as_numeric(),
-            self.mediabox.top.as_numeric(),
-        )
-        corners2 = (
-            page2.mediabox.left.as_numeric(),
-            page2.mediabox.bottom.as_numeric(),
-            page2.mediabox.left.as_numeric(),
-            page2.mediabox.top.as_numeric(),
-            page2.mediabox.right.as_numeric(),
-            page2.mediabox.top.as_numeric(),
-            page2.mediabox.right.as_numeric(),
-            page2.mediabox.bottom.as_numeric(),
-        )
-        if ctm is not None:
-            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
-            new_x = tuple(
-                ctm[0] * corners2[i] + ctm[2] * corners2[i + 1] + ctm[4]
-                for i in range(0, 8, 2)
-            )
-            new_y = tuple(
-                ctm[1] * corners2[i] + ctm[3] * corners2[i + 1] + ctm[5]
-                for i in range(0, 8, 2)
-            )
-        else:
-            new_x = corners2[0:8:2]
-            new_y = corners2[1:8:2]
-        lowerleft = (min(new_x), min(new_y))
-        upperright = (max(new_x), max(new_y))
-        lowerleft = (min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1]))
-        upperright = (
-            max(corners1[2], upperright[0]),
-            max(corners1[3], upperright[1]),
-        )
-
-        self.mediabox.lower_left = lowerleft
-        self.mediabox.upper_right = upperright
-
-    def mergeTransformedPage(
-        self,
-        page2: "PageObject",
-        ctm: Union[CompressedTransformationMatrix, Transformation],
-        expand: bool = False,
-    ) -> None:  # pragma: no cover
-        """
-        mergeTransformedPage is similar to merge_page, but a transformation
-        matrix is applied to the merged stream.
-
-        :param PageObject page2: The page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param tuple ctm: a 6-element tuple containing the operands of the
-            transformation matrix
-        :param bool expand: Whether the page should be expanded to fit the dimensions
-            of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation`  and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeTransformedPage(page2, ctm)",
-            "page2.add_transformation(ctm); page.merge_page(page2)",
-        )
-        if isinstance(ctm, Transformation):
-            ctm = ctm.ctm
-        ctm = cast(CompressedTransformationMatrix, ctm)
-        self._merge_page(
-            page2,
-            lambda page2Content: PageObject._add_transformation_matrix(
-                page2Content, page2.pdf, ctm  # type: ignore[arg-type]
-            ),
-            ctm,
-            expand,
-        )
-
-    def mergeScaledPage(
-        self, page2: "PageObject", scale: float, expand: bool = False
-    ) -> None:  # pragma: no cover
-        """
-        mergeScaledPage is similar to merge_page, but the stream to be merged
-        is scaled by appling a transformation matrix.
-
-        :param PageObject page2: The page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float scale: The scaling factor
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeScaledPage(page2, scale, expand)",
-            "page2.add_transformation(Transformation().scale(scale)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().scale(scale, scale)
-        self.mergeTransformedPage(page2, op, expand)
-
-    def mergeRotatedPage(
-        self, page2: "PageObject", rotation: float, expand: bool = False
-    ) -> None:  # pragma: no cover
-        """
-        mergeRotatedPage is similar to merge_page, but the stream to be merged
-        is rotated by appling a transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float rotation: The angle of the rotation, in degrees
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeRotatedPage(page2, rotation, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().rotate(rotation)
-        self.mergeTransformedPage(page2, op, expand)
-
-    def mergeTranslatedPage(
-        self, page2: "PageObject", tx: float, ty: float, expand: bool = False
-    ) -> None:  # pragma: no cover
-        """
-        mergeTranslatedPage is similar to merge_page, but the stream to be
-        merged is translated by appling a transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float tx: The translation on X axis
-        :param float ty: The translation on Y axis
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeTranslatedPage(page2, tx, ty, expand)",
-            "page2.add_transformation(Transformation().translate(tx, ty)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().translate(tx, ty)
-        self.mergeTransformedPage(page2, op, expand)
-
-    def mergeRotatedTranslatedPage(
-        self,
-        page2: "PageObject",
-        rotation: float,
-        tx: float,
-        ty: float,
-        expand: bool = False,
-    ) -> None:  # pragma: no cover
-        """
-        mergeRotatedTranslatedPage is similar to merge_page, but the stream to
-        be merged is rotated and translated by appling a transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float tx: The translation on X axis
-        :param float ty: The translation on Y axis
-        :param float rotation: The angle of the rotation, in degrees
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeRotatedTranslatedPage(page2, rotation, tx, ty, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().translate(-tx, -ty).rotate(rotation).translate(tx, ty)
-        return self.mergeTransformedPage(page2, op, expand)
-
-    def mergeRotatedScaledPage(
-        self, page2: "PageObject", rotation: float, scale: float, expand: bool = False
-    ) -> None:  # pragma: no cover
-        """
-        mergeRotatedScaledPage is similar to merge_page, but the stream to be
-        merged is rotated and scaled by appling a transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float rotation: The angle of the rotation, in degrees
-        :param float scale: The scaling factor
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeRotatedScaledPage(page2, rotation, scale, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().rotate(rotation).scale(scale, scale)
-        self.mergeTransformedPage(page2, op, expand)
-
-    def mergeScaledTranslatedPage(
-        self,
-        page2: "PageObject",
-        scale: float,
-        tx: float,
-        ty: float,
-        expand: bool = False,
-    ) -> None:  # pragma: no cover
-        """
-        mergeScaledTranslatedPage is similar to merge_page, but the stream to be
-        merged is translated and scaled by appling a transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float scale: The scaling factor
-        :param float tx: The translation on X axis
-        :param float ty: The translation on Y axis
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeScaledTranslatedPage(page2, scale, tx, ty, expand)",
-            "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().scale(scale, scale).translate(tx, ty)
-        return self.mergeTransformedPage(page2, op, expand)
-
-    def mergeRotatedScaledTranslatedPage(
-        self,
-        page2: "PageObject",
-        rotation: float,
-        scale: float,
-        tx: float,
-        ty: float,
-        expand: bool = False,
-    ) -> None:  # pragma: no cover
-        """
-        mergeRotatedScaledTranslatedPage is similar to merge_page, but the
-        stream to be merged is translated, rotated and scaled by appling a
-        transformation matrix.
-
-        :param PageObject page2: the page to be merged into this one. Should be
-            an instance of :class:`PageObject<PageObject>`.
-        :param float tx: The translation on X axis
-        :param float ty: The translation on Y axis
-        :param float rotation: The angle of the rotation, in degrees
-        :param float scale: The scaling factor
-        :param bool expand: Whether the page should be expanded to fit the
-            dimensions of the page to be merged.
-
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` and :meth:`merge_page` instead.
-        """
-        deprecate_with_replacement(
-            "page.mergeRotatedScaledTranslatedPage(page2, rotation, tx, ty, expand)",
-            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
-        )
-        op = Transformation().rotate(rotation).scale(scale, scale).translate(tx, ty)
-        self.mergeTransformedPage(page2, op, expand)
-
-    def add_transformation(
-        self,
-        ctm: Union[Transformation, CompressedTransformationMatrix],
-        expand: bool = False,
-    ) -> None:
-        """
-        Apply a transformation matrix to the page.
-
-        :param tuple ctm: A 6-element tuple containing the operands of the
-            transformation matrix. Alternatively, a
-            :py:class:`Transformation<PyPDF2.Transformation>`
-            object can be passed.
-
-        See :doc:`/user/cropping-and-transforming`.
-        """
-        if isinstance(ctm, Transformation):
-            ctm = ctm.ctm
-        content = self.get_contents()
-        if content is not None:
-            content = PageObject._add_transformation_matrix(content, self.pdf, ctm)
-            content = PageObject._push_pop_gs(content, self.pdf)
-            self[NameObject(PG.CONTENTS)] = content
-        # if expanding the page to fit a new page, calculate the new media box size
-        if expand:
-            corners = [
-                self.mediabox.left.as_numeric(),
-                self.mediabox.bottom.as_numeric(),
-                self.mediabox.left.as_numeric(),
-                self.mediabox.top.as_numeric(),
-                self.mediabox.right.as_numeric(),
-                self.mediabox.top.as_numeric(),
-                self.mediabox.right.as_numeric(),
-                self.mediabox.bottom.as_numeric(),
-            ]
-
-            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
-            new_x = [
-                ctm[0] * corners[i] + ctm[2] * corners[i + 1] + ctm[4]
-                for i in range(0, 8, 2)
-            ]
-            new_y = [
-                ctm[1] * corners[i] + ctm[3] * corners[i + 1] + ctm[5]
-                for i in range(0, 8, 2)
-            ]
-
-            lowerleft = (min(new_x), min(new_y))
-            upperright = (max(new_x), max(new_y))
-            lowerleft = (min(corners[0], lowerleft[0]), min(corners[1], lowerleft[1]))
-            upperright = (
-                max(corners[2], upperright[0]),
-                max(corners[3], upperright[1]),
-            )
-
-            self.mediabox.lower_left = lowerleft
-            self.mediabox.upper_right = upperright
-
-    def addTransformation(
-        self, ctm: CompressedTransformationMatrix
-    ) -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`add_transformation` instead.
-        """
-        deprecate_with_replacement("addTransformation", "add_transformation")
-        self.add_transformation(ctm)
-
-    def scale(self, sx: float, sy: float) -> None:
-        """
-        Scale a page by the given factors by appling a transformation
-        matrix to its content and updating the page size.
-
-        :param float sx: The scaling factor on horizontal axis.
-        :param float sy: The scaling factor on vertical axis.
-        """
-        self.add_transformation((sx, 0, 0, sy, 0, 0))
-        self.mediabox = RectangleObject(
-            (
-                float(self.mediabox.left) * sx,
-                float(self.mediabox.bottom) * sy,
-                float(self.mediabox.right) * sx,
-                float(self.mediabox.top) * sy,
-            )
-        )
-        if PG.VP in self:
-            viewport = self[PG.VP]
-            if isinstance(viewport, ArrayObject):
-                bbox = viewport[0]["/BBox"]
-            else:
-                bbox = viewport["/BBox"]  # type: ignore
-            scaled_bbox = RectangleObject(
-                (
-                    float(bbox[0]) * sx,
-                    float(bbox[1]) * sy,
-                    float(bbox[2]) * sx,
-                    float(bbox[3]) * sy,
-                )
-            )
-            if isinstance(viewport, ArrayObject):
-                self[NameObject(PG.VP)][NumberObject(0)][  # type: ignore
-                    NameObject("/BBox")
-                ] = scaled_bbox
-            else:
-                self[NameObject(PG.VP)][NameObject("/BBox")] = scaled_bbox  # type: ignore
-
-    def scale_by(self, factor: float) -> None:
-        """
-        Scale a page by the given factor by appling a transformation
-        matrix to its content and updating the page size.
-
-        :param float factor: The scaling factor (for both X and Y axis).
-        """
-        self.scale(factor, factor)
-
-    def scaleBy(self, factor: float) -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`scale_by` instead.
-        """
-        deprecate_with_replacement("scaleBy", "scale_by")
-        self.scale(factor, factor)
-
-    def scale_to(self, width: float, height: float) -> None:
-        """
-        Scale a page to the specified dimentions by appling a
-        transformation matrix to its content and updating the page size.
-
-        :param float width: The new width.
-        :param float height: The new heigth.
-        """
-        sx = width / float(self.mediabox.width)
-        sy = height / float(self.mediabox.height)
-        self.scale(sx, sy)
-
-    def scaleTo(self, width: float, height: float) -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`scale_to` instead.
-        """
-        deprecate_with_replacement("scaleTo", "scale_to")
-        self.scale_to(width, height)
-
-    def compress_content_streams(self) -> None:
-        """
-        Compress the size of this page by joining all content streams and
-        applying a FlateDecode filter.
-
-        However, it is possible that this function will perform no action if
-        content stream compression becomes "automatic" for some reason.
-        """
-        content = self.get_contents()
-        if content is not None:
-            if not isinstance(content, ContentStream):
-                content = ContentStream(content, self.pdf)
-            self[NameObject(PG.CONTENTS)] = content.flate_encode()
-
-    def compressContentStreams(self) -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`compress_content_streams` instead.
-        """
-        deprecate_with_replacement("compressContentStreams", "compress_content_streams")
-        self.compress_content_streams()
-
-    def _extract_text_old(
-        self, Tj_sep: str = "", TJ_sep: str = ""
-    ) -> str:  # pragma: no cover
-        """
-        Locate all text drawing commands, in the order they are provided in the
-        content stream, and extract the text.  This works well for some PDF
-        files, but poorly for others, depending on the generator used.  This will
-        be refined in the future.  Do not rely on the order of text coming out of
-        this function, as it will change if this function is made more
-        sophisticated.
-
-        :return: a string object.
-        """
-        text = ""
-        content = self[PG.CONTENTS].get_object()
-        if not isinstance(content, ContentStream):
-            content = ContentStream(content, self.pdf)
-        # Note: we check all strings are TextStringObjects.  ByteStringObjects
-        # are strings where the byte->string encoding was unknown, so adding
-        # them to the text here would be gibberish.
-
-        space_scale = 1.0
-
-        for operands, operator in content.operations:
-            # Missing operators:
-            #   Tf: text font
-            #  Tfs: text font size
-            #   Tc: '5.2.1 Character Spacing'
-            #   Th: '5.2.3 Horizontal Scaling'
-            #   Tl: '5.2.4 Leading'
-            # Tmode: '5.2.5 Text Rendering Mode'
-            # Trise: '5.2.6 Text Rise'
-
-            if operator in [b"Tf", b"Tfs", b"Tc", b"Th", b"Tl", b"Tmode"]:
-                pass
-            elif operator == b"Tw":  # word spacing
-                # See '5.2.2 Word Spacing'
-                space_scale = 1.0 + float(operands[0])
-            elif operator == b"Tj":
-                # See 'TABLE 5.6 Text-showing operators'
-                _text = operands[0]
-                if isinstance(_text, TextStringObject):
-                    text += Tj_sep
-                    text += _text
-                    text += "\n"
-            elif operator == b"T*":
-                # See 'TABLE 5.5 Text-positioning operators'
-                text += "\n"
-            elif operator == b"'":
-                # See 'TABLE 5.6 Text-showing operators'
-                text += "\n"
-                _text = operands[0]
-                if isinstance(_text, TextStringObject):
-                    text += operands[0]
-            elif operator == b'"':
-                # See 'TABLE 5.6 Text-showing operators'
-                _text = operands[2]
-                if isinstance(_text, TextStringObject):
-                    text += "\n"
-                    text += _text
-            elif operator == b"TJ":
-                # See 'TABLE 5.6 Text-showing operators'
-                for i in operands[0]:
-                    if isinstance(i, TextStringObject):
-                        text += TJ_sep
-                        text += i
-                    elif isinstance(i, (NumberObject, FloatObject)):
-                        # a positive value decreases and the negative value increases
-                        # space
-                        if int(i) < -space_scale * 250:
-                            if len(text) == 0 or text[-1] != " ":
-                                text += " "
-                        else:
-                            if len(text) > 1 and text[-1] == " ":
-                                text = text[:-1]
-                text += "\n"
-        return text
-
-    def _debug_for_extract(self) -> str:
-        out = ""
-        for ope, op in ContentStream(
-            self["/Contents"].getObject(), self.pdf, "bytes"
-        ).operations:
-            if op == b"TJ":
-                s = [x for x in ope[0] if isinstance(x, str)]
-            else:
-                s = []
-            out += op.decode("utf-8") + " " + "".join(s) + ope.__repr__() + "\n"
-        out += "\n=============================\n"
-        try:
-            for fo in self["/Resources"]["/Font"]:  # type:ignore
-                out += fo + "\n"
-                out += self["/Resources"]["/Font"][fo].__repr__() + "\n"  # type:ignore
-                try:
-                    enc_repr = self["/Resources"]["/Font"][fo][  # type:ignore
-                        "/Encoding"
-                    ].__repr__()
-                    out += enc_repr + "\n"
-                except Exception:
-                    pass
-        except KeyError:
-            out += "No Font\n"
-        return out
-
-    def _extract_text(
-        self,
-        obj: Any,
-        pdf: Any,
-        space_width: float = 200.0,
-        content_key: Optional[str] = PG.CONTENTS,
-    ) -> str:
-        """
-        Locate all text drawing commands, in the order they are provided in the
-        content stream, and extract the text.  This works well for some PDF
-        files, but poorly for others, depending on the generator used.  This will
-        be refined in the future.  Do not rely on the order of text coming out of
-        this function, as it will change if this function is made more
-        sophisticated.
-
-        :param float space_width: force default space width
-                    (if not extracted from font (default 200)
-        :param Optional[str] content_key: indicate the default key where to extract data
-            None = the opbject; this allow to reuse the function on XObject
-            default = "/Content"
-        :return: a string object.
-        """
-
-        text: str = ""
-        output: str = ""
-        cmaps: Dict[
-            str, Tuple[str, float, Union[str, Dict[int, str]], Dict[str, str]]
-        ] = {}
-        resources_dict = cast(DictionaryObject, obj["/Resources"])
-        if "/Font" in resources_dict:
-            for f in cast(DictionaryObject, resources_dict["/Font"]):
-                cmaps[f] = build_char_map(f, space_width, obj)
-        cmap: Tuple[
-            Union[str, Dict[int, str]], Dict[str, str], str
-        ]  # (encoding,CMAP,font_name)
-        try:
-            content = (
-                obj[content_key].get_object() if isinstance(content_key, str) else obj
-            )
-            if not isinstance(content, ContentStream):
-                content = ContentStream(content, pdf, "bytes")
-        except KeyError:  # it means no content can be extracted(certainly empty page)
-            return ""
-        # Note: we check all strings are TextStringObjects.  ByteStringObjects
-        # are strings where the byte->string encoding was unknown, so adding
-        # them to the text here would be gibberish.
-
-        tm_matrix: List[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
-        tm_prev: List[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
-        char_scale = 1.0
-        space_scale = 1.0
-        _space_width: float = 500.0  # will be set correctly at first Tf
-        TL = 0.0
-        font_size = 12.0  # init just in case of
-
-        # tm_matrix: Tuple = tm_matrix, output: str = output, text: str = text,
-        # char_scale: float = char_scale,space_scale : float = space_scale, _space_width: float = _space_width,
-        # TL: float = TL, font_size: float = font_size, cmap = cmap
-
-        def process_operation(operator: bytes, operands: List) -> None:
-            nonlocal tm_matrix, tm_prev, output, text, char_scale, space_scale, _space_width, TL, font_size, cmap
-            if tm_matrix[4] != 0 and tm_matrix[5] != 0:  # o reuse of the
-                tm_prev = list(tm_matrix)
-            # Table 5.4 page 405
-            if operator == b"BT":
-                tm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
-                # tm_prev = tm_matrix
-                output += text
-                # based
-                # if output != "" and output[-1]!="\n":
-                #    output += "\n"
-                text = ""
-                return None
-            elif operator == b"ET":
-                output += text
-                text = ""
-            # Table 5.2 page 398
-            elif operator == b"Tz":
-                char_scale = float(operands[0]) / 100.0
-            elif operator == b"Tw":
-                space_scale = 1.0 + float(operands[0])
-            elif operator == b"TL":
-                TL = float(operands[0])
-            elif operator == b"Tf":
-                if text != "":
-                    output += text  # .translate(cmap)
-                text = ""
-                try:
-                    _space_width = cmaps[operands[0]][1]
-                    cmap = (
-                        cmaps[operands[0]][2],
-                        cmaps[operands[0]][3],
-                        operands[0],
-                    )  # type:ignore
-                except KeyError:  # font not found
-                    _space_width = unknown_char_map[1]
-                    cmap = (
-                        unknown_char_map[2],
-                        unknown_char_map[3],
-                        "???" + operands[0],
-                    )
-                try:
-                    font_size = float(operands[1])
-                except Exception:
-                    pass  # keep previous size
-            # Table 5.5 page 406
-            elif operator == b"Td":
-                tm_matrix[5] += float(operands[1])
-                tm_matrix[4] += float(operands[0])
-            elif operator == b"Tm":
-                tm_matrix = [
-                    float(operands[0]),
-                    float(operands[1]),
-                    float(operands[2]),
-                    float(operands[3]),
-                    float(operands[4]),
-                    float(operands[5]),
-                ]
-            elif operator == b"T*":
-                tm_matrix[5] -= TL
-            elif operator == b"Tj":
-                t: str = ""
-                tt: bytes = (
-                    encode_pdfdocencoding(operands[0])
-                    if isinstance(operands[0], str)
-                    else operands[0]
-                )
-                if isinstance(cmap[0], str):
-                    t = tt.decode(cmap[0])  # apply str encoding
-                else:  # apply dict encoding
-                    t = "".join(
-                        [
-                            cmap[0][x] if x in cmap[0] else bytes((x,)).decode()
-                            for x in tt
-                        ]
-                    )
-
-                text += "".join([cmap[1][x] if x in cmap[1] else x for x in t])
-            else:
-                return None
-            # process text changes due to positionchange: " "
-            if tm_matrix[5] <= (
-                tm_prev[5]
-                - font_size  # remove scaling * sqrt(tm_matrix[2] ** 2 + tm_matrix[3] ** 2)
-            ):  # it means that we are moving down by one line
-                output += text + "\n"  # .translate(cmap) + "\n"
-                text = ""
-            elif tm_matrix[4] >= (
-                tm_prev[4] + space_scale * _space_width * char_scale
-            ):  # it means that we are moving down by one line
-                text += " "
-            return None
-            # for clarity Operator in (b"g",b"G") : nothing to do
-            # end of process_operation ######
-
-        for operands, operator in content.operations:
-            # multiple operators are defined in here ####
-            if operator == b"'":
-                process_operation(b"T*", [])
-                process_operation(b"Tj", operands)
-            elif operator == b'"':
-                process_operation(b"T*", [])
-                process_operation(b"TJ", operands)
-            elif operator == b"TD":
-                process_operation(b"TL", [-operands[1]])
-                process_operation(b"Td", operands)
-            elif operator == b"TJ":
-                for op in operands[0]:
-                    if isinstance(op, (str, bytes)):
-                        process_operation(b"Tj", [op])
-                    if isinstance(op, (int, float, NumberObject, FloatObject)):
-                        process_operation(b"Td", [-op, 0.0])
-            elif operator == b"Do":
-                output += text
-                if output != "":
-                    output += "\n"
-                try:
-                    xobj = resources_dict["/XObject"]  # type: ignore
-                    if xobj[operands[0]]["/Subtype"] != "/Image":   # type: ignore
-                        output += text
-                        text = self.extract_xform_text(xobj[operands[0]], space_width)  # type: ignore
-                        output += text
-                except Exception:
-                    warnings.warn(
-                        f" impossible to decode XFormObject {operands[0]}",
-                        PdfReadWarning,
-                    )
-                finally:
-                    text = ""
-            else:
-                process_operation(operator, operands)
-        output += text  # just in case of
-        return output
-
-    def extract_text(
-        self, Tj_sep: str = "", TJ_sep: str = "", space_width: float = 200.0
-    ) -> str:
-        """
-        Locate all text drawing commands, in the order they are provided in the
-        content stream, and extract the text.  This works well for some PDF
-        files, but poorly for others, depending on the generator used.  This will
-        be refined in the future.  Do not rely on the order of text coming out of
-        this function, as it will change if this function is made more
-        sophisticated.
-        space_width : float = force default space width (if not extracted from font (default 200)
-
-        :return: a string object.
-        """
-        return self._extract_text(self, self.pdf, space_width, PG.CONTENTS)
-
-    def extract_xform_text(
-        self, xform: EncodedStreamObject, space_width: float = 200.0
-    ) -> str:
-        """
-        Extraction tet from an XObject.
-        space_width : float = force default space width (if not extracted from font (default 200)
-
-        :return: a string object.
-        """
-        return self._extract_text(xform, self.pdf, space_width, None)
-
-    def extractText(
-        self, Tj_sep: str = "", TJ_sep: str = ""
-    ) -> str:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :meth:`extract_text` instead.
-        """
-        deprecate_with_replacement("extractText", "extract_text")
-        return self.extract_text(Tj_sep=Tj_sep, TJ_sep=TJ_sep)
-
-    mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
-    """
-    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
-    defining the boundaries of the physical medium on which the page is
-    intended to be displayed or printed.
-    """
-
-    @property
-    def mediaBox(self) -> RectangleObject:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`mediabox` instead.
-        """
-        deprecate_with_replacement("mediaBox", "mediabox")
-        return self.mediabox
-
-    @mediaBox.setter
-    def mediaBox(self, value: RectangleObject) -> None:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`mediabox` instead.
-        """
-        deprecate_with_replacement("mediaBox", "mediabox")
-        self.mediabox = value
-
-    cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
-    """
-    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
-    defining the visible region of default user space.  When the page is
-    displayed or printed, its contents are to be clipped (cropped) to this
-    rectangle and then imposed on the output medium in some
-    implementation-defined manner.  Default value: same as :attr:`mediabox<mediabox>`.
-    """
-
-    @property
-    def cropBox(self) -> RectangleObject:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`cropbox` instead.
-        """
-        deprecate_with_replacement("cropBox", "cropbox")
-        return self.cropbox
-
-    @cropBox.setter
-    def cropBox(self, value: RectangleObject) -> None:  # pragma: no cover
-        deprecate_with_replacement("cropBox", "cropbox")
-        self.cropbox = value
-
-    bleedbox = _create_rectangle_accessor("/BleedBox", ("/CropBox", PG.MEDIABOX))
-    """
-    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
-    defining the region to which the contents of the page should be clipped
-    when output in a production enviroment.
-    """
-
-    @property
-    def bleedBox(self) -> RectangleObject:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`bleedbox` instead.
-        """
-        deprecate_with_replacement("bleedBox", "bleedbox")
-        return self.bleedbox
-
-    @bleedBox.setter
-    def bleedBox(self, value: RectangleObject) -> None:  # pragma: no cover
-        deprecate_with_replacement("bleedBox", "bleedbox")
-        self.bleedbox = value
-
-    trimbox = _create_rectangle_accessor("/TrimBox", ("/CropBox", PG.MEDIABOX))
-    """
-    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
-    defining the intended dimensions of the finished page after trimming.
-    """
-
-    @property
-    def trimBox(self) -> RectangleObject:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`trimbox` instead.
-        """
-        deprecate_with_replacement("trimBox", "trimbox")
-        return self.trimbox
-
-    @trimBox.setter
-    def trimBox(self, value: RectangleObject) -> None:  # pragma: no cover
-        deprecate_with_replacement("trimBox", "trimbox")
-        self.trimbox = value
-
-    artbox = _create_rectangle_accessor("/ArtBox", ("/CropBox", PG.MEDIABOX))
-    """
-    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
-    defining the extent of the page's meaningful content as intended by the
-    page's creator.
-    """
-
-    @property
-    def artBox(self) -> RectangleObject:  # pragma: no cover
-        """
-        .. deprecated:: 1.28.0
-
-            Use :py:attr:`artbox` instead.
-        """
-        deprecate_with_replacement("artBox", "artbox")
-        return self.artbox
-
-    @artBox.setter
-    def artBox(self, value: RectangleObject) -> None:  # pragma: no cover
-        deprecate_with_replacement("artBox", "artbox")
-        self.artbox = value
-
-
-class _VirtualList:
-    def __init__(
-        self,
-        length_function: Callable[[], int],
-        get_function: Callable[[int], PageObject],
-    ) -> None:
-        self.length_function = length_function
-        self.get_function = get_function
-        self.current = -1
-
-    def __len__(self) -> int:
-        return self.length_function()
-
-    def __getitem__(self, index: int) -> PageObject:
-        if isinstance(index, slice):
-            indices = range(*index.indices(len(self)))
-            cls = type(self)
-            return cls(indices.__len__, lambda idx: self[indices[idx]])
-        if not isinstance(index, int):
-            raise TypeError("sequence indices must be integers")
-        len_self = len(self)
-        if index < 0:
-            # support negative indexes
-            index = len_self + index
-        if index < 0 or index >= len_self:
-            raise IndexError("sequence index out of range")
-        return self.get_function(index)
-
-    def __iter__(self) -> Iterator[PageObject]:
-        for i in range(len(self)):
-            yield self[i]
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import math
+import uuid
+import warnings
+from decimal import Decimal
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
+from ._cmap import build_char_map, unknown_char_map
+from ._utils import (
+    CompressedTransformationMatrix,
+    TransformationMatrixType,
+    deprecate_no_replacement,
+    deprecate_with_replacement,
+    matrix_multiply,
+)
+from .constants import PageAttributes as PG
+from .constants import Ressources as RES
+from .errors import PageSizeNotDefinedError, PdfReadWarning
+from .generic import (
+    ArrayObject,
+    ContentStream,
+    DictionaryObject,
+    EncodedStreamObject,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    RectangleObject,
+    TextStringObject,
+    encode_pdfdocencoding,
+)
+
+
+def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
+    retval: Union[None, RectangleObject, IndirectObject] = self.get(name)
+    if isinstance(retval, RectangleObject):
+        return retval
+    if retval is None:
+        for d in defaults:
+            retval = self.get(d)
+            if retval is not None:
+                break
+    if isinstance(retval, IndirectObject):
+        retval = self.pdf.get_object(retval)
+    retval = RectangleObject(retval)  # type: ignore
+    _set_rectangle(self, name, retval)
+    return retval
+
+
+def getRectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
+    deprecate_no_replacement("getRectangle")
+    return _get_rectangle(self, name, defaults)
+
+
+def _set_rectangle(self: Any, name: str, value: Union[RectangleObject, float]) -> None:
+    if not isinstance(name, NameObject):
+        name = NameObject(name)
+    self[name] = value
+
+
+def setRectangle(self: Any, name: str, value: Union[RectangleObject, float]) -> None:
+    deprecate_no_replacement("setRectangle")
+    _set_rectangle(self, name, value)
+
+
+def _delete_rectangle(self: Any, name: str) -> None:
+    del self[name]
+
+
+def deleteRectangle(self: Any, name: str) -> None:
+    deprecate_no_replacement("deleteRectangle")
+    del self[name]
+
+
+def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property:
+    return property(
+        lambda self: _get_rectangle(self, name, fallback),
+        lambda self, value: _set_rectangle(self, name, value),
+        lambda self: _delete_rectangle(self, name),
+    )
+
+
+def createRectangleAccessor(name: str, fallback: Iterable[str]) -> property:
+    deprecate_no_replacement("createRectangleAccessor")
+    return _create_rectangle_accessor(name, fallback)
+
+
+class Transformation:
+    """
+    Specify a 2D transformation.
+
+    The transformation between two coordinate systems is represented by a 3-by-3
+    transformation matrix written as follows::
+
+        a b 0
+        c d 0
+        e f 1
+
+    Because a transformation matrix has only six elements that can be changed,
+    it is usually specified in PDF as the six-element array [ a b c d e f ].
+
+    Coordinate transformations are expressed as matrix multiplications::
+
+                                 a b 0
+     [ x′ y′ 1 ] = [ x y 1 ] ×   c d 0
+                                 e f 1
+
+
+    Usage
+    -----
+
+        >>> from PyPDF2 import Transformation
+        >>> op = Transformation().scale(sx=2, sy=3).translate(tx=10, ty=20)
+        >>> page.add_transformation(op)
+    """
+
+    # 9.5.4 Coordinate Systems for 3D
+    # 4.2.2 Common Transformations
+    def __init__(self, ctm: CompressedTransformationMatrix = (1, 0, 0, 1, 0, 0)):
+        self.ctm = ctm
+
+    @property
+    def matrix(self) -> TransformationMatrixType:
+        return (
+            (self.ctm[0], self.ctm[1], 0),
+            (self.ctm[2], self.ctm[3], 0),
+            (self.ctm[4], self.ctm[5], 1),
+        )
+
+    @staticmethod
+    def compress(matrix: TransformationMatrixType) -> CompressedTransformationMatrix:
+        return (
+            matrix[0][0],
+            matrix[0][1],
+            matrix[1][0],
+            matrix[1][1],
+            matrix[0][2],
+            matrix[1][2],
+        )
+
+    def translate(self, tx: float = 0, ty: float = 0) -> "Transformation":
+        m = self.ctm
+        return Transformation(ctm=(m[0], m[1], m[2], m[3], m[4] + tx, m[5] + ty))
+
+    def scale(
+        self, sx: Optional[float] = None, sy: Optional[float] = None
+    ) -> "Transformation":
+        if sx is None and sy is None:
+            raise ValueError("Either sx or sy must be specified")
+        if sx is None:
+            sx = sy
+        if sy is None:
+            sy = sx
+        assert sx is not None
+        assert sy is not None
+        op: TransformationMatrixType = ((sx, 0, 0), (0, sy, 0), (0, 0, 1))
+        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
+        return Transformation(ctm)
+
+    def rotate(self, rotation: float) -> "Transformation":
+        rotation = math.radians(rotation)
+        op: TransformationMatrixType = (
+            (math.cos(rotation), math.sin(rotation), 0),
+            (-math.sin(rotation), math.cos(rotation), 0),
+            (0, 0, 1),
+        )
+        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
+        return Transformation(ctm)
+
+    def __repr__(self) -> str:
+        return f"Transformation(ctm={self.ctm})"
+
+
+class PageObject(DictionaryObject):
+    """
+    PageObject represents a single page within a PDF file.
+
+    Typically this object will be created by accessing the
+    :meth:`get_page()<PyPDF2.PdfReader.get_page>` method of the
+    :class:`PdfReader<PyPDF2.PdfReader>` class, but it is
+    also possible to create an empty page with the
+    :meth:`create_blank_page()<PyPDF2._page.PageObject.create_blank_page>` static method.
+
+    :param pdf: PDF file the page belongs to.
+    :param indirect_ref: Stores the original indirect reference to
+        this object in its source PDF
+    """
+
+    def __init__(
+        self,
+        pdf: Optional[Any] = None,  # PdfReader
+        indirect_ref: Optional[IndirectObject] = None,
+    ) -> None:
+        from ._reader import PdfReader
+
+        DictionaryObject.__init__(self)
+        self.pdf: Optional[PdfReader] = pdf
+        self.indirect_ref = indirect_ref
+
+    @staticmethod
+    def create_blank_page(
+        pdf: Optional[Any] = None,  # PdfReader
+        width: Union[float, Decimal, None] = None,
+        height: Union[float, Decimal, None] = None,
+    ) -> "PageObject":
+        """
+        Return a new blank page.
+
+        If ``width`` or ``height`` is ``None``, try to get the page size
+        from the last page of *pdf*.
+
+        :param pdf: PDF file the page belongs to
+        :param float width: The width of the new page expressed in default user
+            space units.
+        :param float height: The height of the new page expressed in default user
+            space units.
+        :return: the new blank page:
+        :rtype: :class:`PageObject<PageObject>`
+        :raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
+            no page
+        """
+        page = PageObject(pdf)
+
+        # Creates a new page (cf PDF Reference  7.7.3.3)
+        page.__setitem__(NameObject(PG.TYPE), NameObject("/Page"))
+        page.__setitem__(NameObject(PG.PARENT), NullObject())
+        page.__setitem__(NameObject(PG.RESOURCES), DictionaryObject())
+        if width is None or height is None:
+            if pdf is not None and len(pdf.pages) > 0:
+                lastpage = pdf.pages[len(pdf.pages) - 1]
+                width = lastpage.mediabox.width
+                height = lastpage.mediabox.height
+            else:
+                raise PageSizeNotDefinedError
+        page.__setitem__(
+            NameObject(PG.MEDIABOX), RectangleObject((0, 0, width, height))  # type: ignore
+        )
+
+        return page
+
+    @staticmethod
+    def createBlankPage(
+        pdf: Optional[Any] = None,  # PdfReader
+        width: Union[float, Decimal, None] = None,
+        height: Union[float, Decimal, None] = None,
+    ) -> "PageObject":  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`create_blank_page` instead.
+        """
+        deprecate_with_replacement("createBlankPage", "create_blank_page")
+        return PageObject.create_blank_page(pdf, width, height)
+
+    def rotate(self, angle: float) -> "PageObject":
+        """
+        Rotate a page clockwise by increments of 90 degrees.
+
+        :param int angle: Angle to rotate the page.  Must be an increment
+            of 90 deg.
+        """
+        if angle % 90 != 0:
+            raise ValueError("Rotation angle must be a multiple of 90")
+        rotate_obj = self.get(PG.ROTATE, 0)
+        current_angle = (
+            rotate_obj if isinstance(rotate_obj, int) else rotate_obj.get_object()
+        )
+        self[NameObject(PG.ROTATE)] = NumberObject(current_angle + angle)
+        return self
+
+    def rotate_clockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+        deprecate_with_replacement("rotate_clockwise", "rotate")
+        return self.rotate(angle)
+
+    def rotateClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`rotate_clockwise` instead.
+        """
+        deprecate_with_replacement("rotateClockwise", "rotate")
+        return self.rotate(angle)
+
+    def rotateCounterClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`rotate_clockwise` with a negative argument instead.
+        """
+        deprecate_with_replacement("rotateCounterClockwise", "rotate")
+        return self.rotate(-angle)
+
+    @staticmethod
+    def _merge_resources(
+        res1: DictionaryObject, res2: DictionaryObject, resource: Any
+    ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        new_res = DictionaryObject()
+        new_res.update(res1.get(resource, DictionaryObject()).get_object())
+        page2res = cast(
+            DictionaryObject, res2.get(resource, DictionaryObject()).get_object()
+        )
+        rename_res = {}
+        for key in list(page2res.keys()):
+            if key in new_res and new_res.raw_get(key) != page2res.raw_get(key):
+                newname = NameObject(key + str(uuid.uuid4()))
+                rename_res[key] = newname
+                new_res[newname] = page2res[key]
+            elif key not in new_res:
+                new_res[key] = page2res.raw_get(key)
+        return new_res, rename_res
+
+    @staticmethod
+    def _content_stream_rename(
+        stream: ContentStream, rename: Dict[Any, Any], pdf: Any  # PdfReader
+    ) -> ContentStream:
+        if not rename:
+            return stream
+        stream = ContentStream(stream, pdf)
+        for operands, _operator in stream.operations:
+            if isinstance(operands, list):
+                for i in range(len(operands)):
+                    op = operands[i]
+                    if isinstance(op, NameObject):
+                        operands[i] = rename.get(op, op)
+            elif isinstance(operands, dict):
+                for i in operands:
+                    op = operands[i]
+                    if isinstance(op, NameObject):
+                        operands[i] = rename.get(op, op)
+            else:
+                raise KeyError("type of operands is %s" % type(operands))
+        return stream
+
+    @staticmethod
+    def _push_pop_gs(contents: Any, pdf: Any) -> ContentStream:  # PdfReader
+        # adds a graphics state "push" and "pop" to the beginning and end
+        # of a content stream.  This isolates it from changes such as
+        # transformation matricies.
+        stream = ContentStream(contents, pdf)
+        stream.operations.insert(0, ([], "q"))
+        stream.operations.append(([], "Q"))
+        return stream
+
+    @staticmethod
+    def _add_transformation_matrix(
+        contents: Any, pdf: Any, ctm: CompressedTransformationMatrix
+    ) -> ContentStream:  # PdfReader
+        # adds transformation matrix at the beginning of the given
+        # contents stream.
+        a, b, c, d, e, f = ctm
+        contents = ContentStream(contents, pdf)
+        contents.operations.insert(
+            0,
+            [
+                [
+                    FloatObject(a),
+                    FloatObject(b),
+                    FloatObject(c),
+                    FloatObject(d),
+                    FloatObject(e),
+                    FloatObject(f),
+                ],
+                " cm",
+            ],
+        )
+        return contents
+
+    def get_contents(self) -> Optional[ContentStream]:
+        """
+        Access the page contents.
+
+        :return: the ``/Contents`` object, or ``None`` if it doesn't exist.
+            ``/Contents`` is optional, as described in PDF Reference  7.7.3.3
+        """
+        if PG.CONTENTS in self:
+            return self[PG.CONTENTS].get_object()  # type: ignore
+        else:
+            return None
+
+    def getContents(self) -> Optional[ContentStream]:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`get_contents` instead.
+        """
+        deprecate_with_replacement("getContents", "get_contents")
+        return self.get_contents()
+
+    def merge_page(self, page2: "PageObject", expand: bool = False) -> None:
+        """
+        Merge the content streams of two pages into one.
+
+        Resource references
+        (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc
+        of this page are not altered.  The parameter page's content stream will
+        be added to the end of this page's content stream, meaning that it will
+        be drawn after, or "on top" of this page.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param bool expand: If true, the current page dimensions will be
+            expanded to accommodate the dimensions of the page to be merged.
+        """
+        self._merge_page(page2, expand=expand)
+
+    def mergePage(self, page2: "PageObject") -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement("mergePage", "merge_page")
+        return self.merge_page(page2)
+
+    def _merge_page(
+        self,
+        page2: "PageObject",
+        page2transformation: Optional[Callable[[Any], ContentStream]] = None,
+        ctm: Optional[CompressedTransformationMatrix] = None,
+        expand: bool = False,
+    ) -> None:
+        # First we work on merging the resource dictionaries.  This allows us
+        # to find out what symbols in the content streams we might need to
+        # rename.
+
+        new_resources = DictionaryObject()
+        rename = {}
+        original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object())
+        page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object())
+        new_annots = ArrayObject()
+
+        for page in (self, page2):
+            if PG.ANNOTS in page:
+                annots = page[PG.ANNOTS]
+                if isinstance(annots, ArrayObject):
+                    for ref in annots:
+                        new_annots.append(ref)
+
+        for res in (
+            RES.EXT_G_STATE,
+            RES.FONT,
+            RES.XOBJECT,
+            RES.COLOR_SPACE,
+            RES.PATTERN,
+            RES.SHADING,
+            RES.PROPERTIES,
+        ):
+            new, newrename = PageObject._merge_resources(
+                original_resources, page2resources, res
+            )
+            if new:
+                new_resources[NameObject(res)] = new
+                rename.update(newrename)
+
+        # Combine /ProcSet sets.
+        new_resources[NameObject(RES.PROC_SET)] = ArrayObject(
+            frozenset(
+                original_resources.get(RES.PROC_SET, ArrayObject()).get_object()  # type: ignore
+            ).union(
+                frozenset(page2resources.get(RES.PROC_SET, ArrayObject()).get_object())  # type: ignore
+            )
+        )
+
+        new_content_array = ArrayObject()
+
+        original_content = self.get_contents()
+        if original_content is not None:
+            new_content_array.append(
+                PageObject._push_pop_gs(original_content, self.pdf)
+            )
+
+        page2content = page2.get_contents()
+        if page2content is not None:
+            page2content = ContentStream(page2content, self.pdf)
+            page2content.operations.insert(
+                0,
+                (
+                    map(
+                        FloatObject,
+                        [
+                            page2.trimbox.left,
+                            page2.trimbox.bottom,
+                            page2.trimbox.width,
+                            page2.trimbox.height,
+                        ],
+                    ),
+                    "re",
+                ),
+            )
+            page2content.operations.insert(1, ([], "W"))
+            page2content.operations.insert(2, ([], "n"))
+            if page2transformation is not None:
+                page2content = page2transformation(page2content)
+            page2content = PageObject._content_stream_rename(
+                page2content, rename, self.pdf
+            )
+            page2content = PageObject._push_pop_gs(page2content, self.pdf)
+            new_content_array.append(page2content)
+
+        # if expanding the page to fit a new page, calculate the new media box size
+        if expand:
+            self._expand_mediabox(page2, ctm)
+
+        self[NameObject(PG.CONTENTS)] = ContentStream(new_content_array, self.pdf)
+        self[NameObject(PG.RESOURCES)] = new_resources
+        self[NameObject(PG.ANNOTS)] = new_annots
+
+    def _expand_mediabox(
+        self, page2: "PageObject", ctm: Optional[CompressedTransformationMatrix]
+    ) -> None:
+        corners1 = (
+            self.mediabox.left.as_numeric(),
+            self.mediabox.bottom.as_numeric(),
+            self.mediabox.right.as_numeric(),
+            self.mediabox.top.as_numeric(),
+        )
+        corners2 = (
+            page2.mediabox.left.as_numeric(),
+            page2.mediabox.bottom.as_numeric(),
+            page2.mediabox.left.as_numeric(),
+            page2.mediabox.top.as_numeric(),
+            page2.mediabox.right.as_numeric(),
+            page2.mediabox.top.as_numeric(),
+            page2.mediabox.right.as_numeric(),
+            page2.mediabox.bottom.as_numeric(),
+        )
+        if ctm is not None:
+            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
+            new_x = tuple(
+                ctm[0] * corners2[i] + ctm[2] * corners2[i + 1] + ctm[4]
+                for i in range(0, 8, 2)
+            )
+            new_y = tuple(
+                ctm[1] * corners2[i] + ctm[3] * corners2[i + 1] + ctm[5]
+                for i in range(0, 8, 2)
+            )
+        else:
+            new_x = corners2[0:8:2]
+            new_y = corners2[1:8:2]
+        lowerleft = (min(new_x), min(new_y))
+        upperright = (max(new_x), max(new_y))
+        lowerleft = (min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1]))
+        upperright = (
+            max(corners1[2], upperright[0]),
+            max(corners1[3], upperright[1]),
+        )
+
+        self.mediabox.lower_left = lowerleft
+        self.mediabox.upper_right = upperright
+
+    def mergeTransformedPage(
+        self,
+        page2: "PageObject",
+        ctm: Union[CompressedTransformationMatrix, Transformation],
+        expand: bool = False,
+    ) -> None:  # pragma: no cover
+        """
+        mergeTransformedPage is similar to merge_page, but a transformation
+        matrix is applied to the merged stream.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param tuple ctm: a 6-element tuple containing the operands of the
+            transformation matrix
+        :param bool expand: Whether the page should be expanded to fit the dimensions
+            of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation`  and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeTransformedPage(page2, ctm)",
+            "page2.add_transformation(ctm); page.merge_page(page2)",
+        )
+        if isinstance(ctm, Transformation):
+            ctm = ctm.ctm
+        ctm = cast(CompressedTransformationMatrix, ctm)
+        self._merge_page(
+            page2,
+            lambda page2Content: PageObject._add_transformation_matrix(
+                page2Content, page2.pdf, ctm  # type: ignore[arg-type]
+            ),
+            ctm,
+            expand,
+        )
+
+    def mergeScaledPage(
+        self, page2: "PageObject", scale: float, expand: bool = False
+    ) -> None:  # pragma: no cover
+        """
+        mergeScaledPage is similar to merge_page, but the stream to be merged
+        is scaled by appling a transformation matrix.
+
+        :param PageObject page2: The page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeScaledPage(page2, scale, expand)",
+            "page2.add_transformation(Transformation().scale(scale)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().scale(scale, scale)
+        self.mergeTransformedPage(page2, op, expand)
+
+    def mergeRotatedPage(
+        self, page2: "PageObject", rotation: float, expand: bool = False
+    ) -> None:  # pragma: no cover
+        """
+        mergeRotatedPage is similar to merge_page, but the stream to be merged
+        is rotated by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeRotatedPage(page2, rotation, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().rotate(rotation)
+        self.mergeTransformedPage(page2, op, expand)
+
+    def mergeTranslatedPage(
+        self, page2: "PageObject", tx: float, ty: float, expand: bool = False
+    ) -> None:  # pragma: no cover
+        """
+        mergeTranslatedPage is similar to merge_page, but the stream to be
+        merged is translated by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeTranslatedPage(page2, tx, ty, expand)",
+            "page2.add_transformation(Transformation().translate(tx, ty)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().translate(tx, ty)
+        self.mergeTransformedPage(page2, op, expand)
+
+    def mergeRotatedTranslatedPage(
+        self,
+        page2: "PageObject",
+        rotation: float,
+        tx: float,
+        ty: float,
+        expand: bool = False,
+    ) -> None:  # pragma: no cover
+        """
+        mergeRotatedTranslatedPage is similar to merge_page, but the stream to
+        be merged is rotated and translated by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeRotatedTranslatedPage(page2, rotation, tx, ty, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).translate(tx, ty)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().translate(-tx, -ty).rotate(rotation).translate(tx, ty)
+        return self.mergeTransformedPage(page2, op, expand)
+
+    def mergeRotatedScaledPage(
+        self, page2: "PageObject", rotation: float, scale: float, expand: bool = False
+    ) -> None:  # pragma: no cover
+        """
+        mergeRotatedScaledPage is similar to merge_page, but the stream to be
+        merged is rotated and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeRotatedScaledPage(page2, rotation, scale, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().rotate(rotation).scale(scale, scale)
+        self.mergeTransformedPage(page2, op, expand)
+
+    def mergeScaledTranslatedPage(
+        self,
+        page2: "PageObject",
+        scale: float,
+        tx: float,
+        ty: float,
+        expand: bool = False,
+    ) -> None:  # pragma: no cover
+        """
+        mergeScaledTranslatedPage is similar to merge_page, but the stream to be
+        merged is translated and scaled by appling a transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float scale: The scaling factor
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeScaledTranslatedPage(page2, scale, tx, ty, expand)",
+            "page2.add_transformation(Transformation().scale(scale).translate(tx, ty)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().scale(scale, scale).translate(tx, ty)
+        return self.mergeTransformedPage(page2, op, expand)
+
+    def mergeRotatedScaledTranslatedPage(
+        self,
+        page2: "PageObject",
+        rotation: float,
+        scale: float,
+        tx: float,
+        ty: float,
+        expand: bool = False,
+    ) -> None:  # pragma: no cover
+        """
+        mergeRotatedScaledTranslatedPage is similar to merge_page, but the
+        stream to be merged is translated, rotated and scaled by appling a
+        transformation matrix.
+
+        :param PageObject page2: the page to be merged into this one. Should be
+            an instance of :class:`PageObject<PageObject>`.
+        :param float tx: The translation on X axis
+        :param float ty: The translation on Y axis
+        :param float rotation: The angle of the rotation, in degrees
+        :param float scale: The scaling factor
+        :param bool expand: Whether the page should be expanded to fit the
+            dimensions of the page to be merged.
+
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` and :meth:`merge_page` instead.
+        """
+        deprecate_with_replacement(
+            "page.mergeRotatedScaledTranslatedPage(page2, rotation, tx, ty, expand)",
+            "page2.add_transformation(Transformation().rotate(rotation).scale(scale)); page.merge_page(page2, expand)",
+        )
+        op = Transformation().rotate(rotation).scale(scale, scale).translate(tx, ty)
+        self.mergeTransformedPage(page2, op, expand)
+
+    def add_transformation(
+        self,
+        ctm: Union[Transformation, CompressedTransformationMatrix],
+        expand: bool = False,
+    ) -> None:
+        """
+        Apply a transformation matrix to the page.
+
+        :param tuple ctm: A 6-element tuple containing the operands of the
+            transformation matrix. Alternatively, a
+            :py:class:`Transformation<PyPDF2.Transformation>`
+            object can be passed.
+
+        See :doc:`/user/cropping-and-transforming`.
+        """
+        if isinstance(ctm, Transformation):
+            ctm = ctm.ctm
+        content = self.get_contents()
+        if content is not None:
+            content = PageObject._add_transformation_matrix(content, self.pdf, ctm)
+            content = PageObject._push_pop_gs(content, self.pdf)
+            self[NameObject(PG.CONTENTS)] = content
+        # if expanding the page to fit a new page, calculate the new media box size
+        if expand:
+            corners = [
+                self.mediabox.left.as_numeric(),
+                self.mediabox.bottom.as_numeric(),
+                self.mediabox.left.as_numeric(),
+                self.mediabox.top.as_numeric(),
+                self.mediabox.right.as_numeric(),
+                self.mediabox.top.as_numeric(),
+                self.mediabox.right.as_numeric(),
+                self.mediabox.bottom.as_numeric(),
+            ]
+
+            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
+            new_x = [
+                ctm[0] * corners[i] + ctm[2] * corners[i + 1] + ctm[4]
+                for i in range(0, 8, 2)
+            ]
+            new_y = [
+                ctm[1] * corners[i] + ctm[3] * corners[i + 1] + ctm[5]
+                for i in range(0, 8, 2)
+            ]
+
+            lowerleft = (min(new_x), min(new_y))
+            upperright = (max(new_x), max(new_y))
+            lowerleft = (min(corners[0], lowerleft[0]), min(corners[1], lowerleft[1]))
+            upperright = (
+                max(corners[2], upperright[0]),
+                max(corners[3], upperright[1]),
+            )
+
+            self.mediabox.lower_left = lowerleft
+            self.mediabox.upper_right = upperright
+
+    def addTransformation(
+        self, ctm: CompressedTransformationMatrix
+    ) -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`add_transformation` instead.
+        """
+        deprecate_with_replacement("addTransformation", "add_transformation")
+        self.add_transformation(ctm)
+
+    def scale(self, sx: float, sy: float) -> None:
+        """
+        Scale a page by the given factors by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float sx: The scaling factor on horizontal axis.
+        :param float sy: The scaling factor on vertical axis.
+        """
+        self.add_transformation((sx, 0, 0, sy, 0, 0))
+        self.mediabox = RectangleObject(
+            (
+                float(self.mediabox.left) * sx,
+                float(self.mediabox.bottom) * sy,
+                float(self.mediabox.right) * sx,
+                float(self.mediabox.top) * sy,
+            )
+        )
+        if PG.VP in self:
+            viewport = self[PG.VP]
+            if isinstance(viewport, ArrayObject):
+                bbox = viewport[0]["/BBox"]
+            else:
+                bbox = viewport["/BBox"]  # type: ignore
+            scaled_bbox = RectangleObject(
+                (
+                    float(bbox[0]) * sx,
+                    float(bbox[1]) * sy,
+                    float(bbox[2]) * sx,
+                    float(bbox[3]) * sy,
+                )
+            )
+            if isinstance(viewport, ArrayObject):
+                self[NameObject(PG.VP)][NumberObject(0)][  # type: ignore
+                    NameObject("/BBox")
+                ] = scaled_bbox
+            else:
+                self[NameObject(PG.VP)][NameObject("/BBox")] = scaled_bbox  # type: ignore
+
+    def scale_by(self, factor: float) -> None:
+        """
+        Scale a page by the given factor by appling a transformation
+        matrix to its content and updating the page size.
+
+        :param float factor: The scaling factor (for both X and Y axis).
+        """
+        self.scale(factor, factor)
+
+    def scaleBy(self, factor: float) -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`scale_by` instead.
+        """
+        deprecate_with_replacement("scaleBy", "scale_by")
+        self.scale(factor, factor)
+
+    def scale_to(self, width: float, height: float) -> None:
+        """
+        Scale a page to the specified dimentions by appling a
+        transformation matrix to its content and updating the page size.
+
+        :param float width: The new width.
+        :param float height: The new heigth.
+        """
+        sx = width / float(self.mediabox.width)
+        sy = height / float(self.mediabox.height)
+        self.scale(sx, sy)
+
+    def scaleTo(self, width: float, height: float) -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`scale_to` instead.
+        """
+        deprecate_with_replacement("scaleTo", "scale_to")
+        self.scale_to(width, height)
+
+    def compress_content_streams(self) -> None:
+        """
+        Compress the size of this page by joining all content streams and
+        applying a FlateDecode filter.
+
+        However, it is possible that this function will perform no action if
+        content stream compression becomes "automatic" for some reason.
+        """
+        content = self.get_contents()
+        if content is not None:
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, self.pdf)
+            self[NameObject(PG.CONTENTS)] = content.flate_encode()
+
+    def compressContentStreams(self) -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`compress_content_streams` instead.
+        """
+        deprecate_with_replacement("compressContentStreams", "compress_content_streams")
+        self.compress_content_streams()
+
+    def _extract_text_old(
+        self, Tj_sep: str = "", TJ_sep: str = ""
+    ) -> str:  # pragma: no cover
+        """
+        Locate all text drawing commands, in the order they are provided in the
+        content stream, and extract the text.  This works well for some PDF
+        files, but poorly for others, depending on the generator used.  This will
+        be refined in the future.  Do not rely on the order of text coming out of
+        this function, as it will change if this function is made more
+        sophisticated.
+
+        :return: a string object.
+        """
+        text = ""
+        content = self[PG.CONTENTS].get_object()
+        if not isinstance(content, ContentStream):
+            content = ContentStream(content, self.pdf)
+        # Note: we check all strings are TextStringObjects.  ByteStringObjects
+        # are strings where the byte->string encoding was unknown, so adding
+        # them to the text here would be gibberish.
+
+        space_scale = 1.0
+
+        for operands, operator in content.operations:
+            # Missing operators:
+            #   Tf: text font
+            #  Tfs: text font size
+            #   Tc: '5.2.1 Character Spacing'
+            #   Th: '5.2.3 Horizontal Scaling'
+            #   Tl: '5.2.4 Leading'
+            # Tmode: '5.2.5 Text Rendering Mode'
+            # Trise: '5.2.6 Text Rise'
+
+            if operator in [b"Tf", b"Tfs", b"Tc", b"Th", b"Tl", b"Tmode"]:
+                pass
+            elif operator == b"Tw":  # word spacing
+                # See '5.2.2 Word Spacing'
+                space_scale = 1.0 + float(operands[0])
+            elif operator == b"Tj":
+                # See 'TABLE 5.6 Text-showing operators'
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += Tj_sep
+                    text += _text
+                    text += "\n"
+            elif operator == b"T*":
+                # See 'TABLE 5.5 Text-positioning operators'
+                text += "\n"
+            elif operator == b"'":
+                # See 'TABLE 5.6 Text-showing operators'
+                text += "\n"
+                _text = operands[0]
+                if isinstance(_text, TextStringObject):
+                    text += operands[0]
+            elif operator == b'"':
+                # See 'TABLE 5.6 Text-showing operators'
+                _text = operands[2]
+                if isinstance(_text, TextStringObject):
+                    text += "\n"
+                    text += _text
+            elif operator == b"TJ":
+                # See 'TABLE 5.6 Text-showing operators'
+                for i in operands[0]:
+                    if isinstance(i, TextStringObject):
+                        text += TJ_sep
+                        text += i
+                    elif isinstance(i, (NumberObject, FloatObject)):
+                        # a positive value decreases and the negative value increases
+                        # space
+                        if int(i) < -space_scale * 250:
+                            if len(text) == 0 or text[-1] != " ":
+                                text += " "
+                        else:
+                            if len(text) > 1 and text[-1] == " ":
+                                text = text[:-1]
+                text += "\n"
+        return text
+
+    def _debug_for_extract(self) -> str:
+        out = ""
+        for ope, op in ContentStream(
+            self["/Contents"].getObject(), self.pdf, "bytes"
+        ).operations:
+            if op == b"TJ":
+                s = [x for x in ope[0] if isinstance(x, str)]
+            else:
+                s = []
+            out += op.decode("utf-8") + " " + "".join(s) + ope.__repr__() + "\n"
+        out += "\n=============================\n"
+        try:
+            for fo in self["/Resources"]["/Font"]:  # type:ignore
+                out += fo + "\n"
+                out += self["/Resources"]["/Font"][fo].__repr__() + "\n"  # type:ignore
+                try:
+                    enc_repr = self["/Resources"]["/Font"][fo][  # type:ignore
+                        "/Encoding"
+                    ].__repr__()
+                    out += enc_repr + "\n"
+                except Exception:
+                    pass
+        except KeyError:
+            out += "No Font\n"
+        return out
+
+    def _extract_text(
+        self,
+        obj: Any,
+        pdf: Any,
+        space_width: float = 200.0,
+        content_key: Optional[str] = PG.CONTENTS,
+    ) -> str:
+        """
+        Locate all text drawing commands, in the order they are provided in the
+        content stream, and extract the text.  This works well for some PDF
+        files, but poorly for others, depending on the generator used.  This will
+        be refined in the future.  Do not rely on the order of text coming out of
+        this function, as it will change if this function is made more
+        sophisticated.
+
+        :param float space_width: force default space width
+                    (if not extracted from font (default 200)
+        :param Optional[str] content_key: indicate the default key where to extract data
+            None = the opbject; this allow to reuse the function on XObject
+            default = "/Content"
+        :return: a string object.
+        """
+
+        text: str = ""
+        output: str = ""
+        cmaps: Dict[
+            str, Tuple[str, float, Union[str, Dict[int, str]], Dict[str, str]]
+        ] = {}
+        resources_dict = cast(DictionaryObject, obj["/Resources"])
+        if "/Font" in resources_dict:
+            for f in cast(DictionaryObject, resources_dict["/Font"]):
+                cmaps[f] = build_char_map(f, space_width, obj)
+        cmap: Tuple[
+            Union[str, Dict[int, str]], Dict[str, str], str
+        ]  # (encoding,CMAP,font_name)
+        try:
+            content = (
+                obj[content_key].get_object() if isinstance(content_key, str) else obj
+            )
+            if not isinstance(content, ContentStream):
+                content = ContentStream(content, pdf, "bytes")
+        except KeyError:  # it means no content can be extracted(certainly empty page)
+            return ""
+        # Note: we check all strings are TextStringObjects.  ByteStringObjects
+        # are strings where the byte->string encoding was unknown, so adding
+        # them to the text here would be gibberish.
+
+        tm_matrix: List[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        tm_prev: List[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        char_scale = 1.0
+        space_scale = 1.0
+        _space_width: float = 500.0  # will be set correctly at first Tf
+        TL = 0.0
+        font_size = 12.0  # init just in case of
+
+        # tm_matrix: Tuple = tm_matrix, output: str = output, text: str = text,
+        # char_scale: float = char_scale,space_scale : float = space_scale, _space_width: float = _space_width,
+        # TL: float = TL, font_size: float = font_size, cmap = cmap
+
+        def process_operation(operator: bytes, operands: List) -> None:
+            nonlocal tm_matrix, tm_prev, output, text, char_scale, space_scale, _space_width, TL, font_size, cmap
+            if tm_matrix[4] != 0 and tm_matrix[5] != 0:  # o reuse of the
+                tm_prev = list(tm_matrix)
+            # Table 5.4 page 405
+            if operator == b"BT":
+                tm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+                # tm_prev = tm_matrix
+                output += text
+                # based
+                # if output != "" and output[-1]!="\n":
+                #    output += "\n"
+                text = ""
+                return None
+            elif operator == b"ET":
+                output += text
+                text = ""
+            # Table 5.2 page 398
+            elif operator == b"Tz":
+                char_scale = float(operands[0]) / 100.0
+            elif operator == b"Tw":
+                space_scale = 1.0 + float(operands[0])
+            elif operator == b"TL":
+                TL = float(operands[0])
+            elif operator == b"Tf":
+                if text != "":
+                    output += text  # .translate(cmap)
+                text = ""
+                try:
+                    _space_width = cmaps[operands[0]][1]
+                    cmap = (
+                        cmaps[operands[0]][2],
+                        cmaps[operands[0]][3],
+                        operands[0],
+                    )  # type:ignore
+                except KeyError:  # font not found
+                    _space_width = unknown_char_map[1]
+                    cmap = (
+                        unknown_char_map[2],
+                        unknown_char_map[3],
+                        "???" + operands[0],
+                    )
+                try:
+                    font_size = float(operands[1])
+                except Exception:
+                    pass  # keep previous size
+            # Table 5.5 page 406
+            elif operator == b"Td":
+                tm_matrix[5] += float(operands[1])
+                tm_matrix[4] += float(operands[0])
+            elif operator == b"Tm":
+                tm_matrix = [
+                    float(operands[0]),
+                    float(operands[1]),
+                    float(operands[2]),
+                    float(operands[3]),
+                    float(operands[4]),
+                    float(operands[5]),
+                ]
+            elif operator == b"T*":
+                tm_matrix[5] -= TL
+            elif operator == b"Tj":
+                t: str = ""
+                tt: bytes = (
+                    encode_pdfdocencoding(operands[0])
+                    if isinstance(operands[0], str)
+                    else operands[0]
+                )
+                if isinstance(cmap[0], str):
+                    t = tt.decode(cmap[0], "surrogatepass")  # apply str encoding
+                else:  # apply dict encoding
+                    t = "".join(
+                        [
+                            cmap[0][x] if x in cmap[0] else bytes((x,)).decode()
+                            for x in tt
+                        ]
+                    )
+
+                text += "".join([cmap[1][x] if x in cmap[1] else x for x in t])
+            else:
+                return None
+            # process text changes due to positionchange: " "
+            if tm_matrix[5] <= (
+                tm_prev[5]
+                - font_size  # remove scaling * sqrt(tm_matrix[2] ** 2 + tm_matrix[3] ** 2)
+            ):  # it means that we are moving down by one line
+                output += text + "\n"  # .translate(cmap) + "\n"
+                text = ""
+            elif tm_matrix[4] >= (
+                tm_prev[4] + space_scale * _space_width * char_scale
+            ):  # it means that we are moving down by one line
+                text += " "
+            return None
+            # for clarity Operator in (b"g",b"G") : nothing to do
+            # end of process_operation ######
+
+        for operands, operator in content.operations:
+            # multiple operators are defined in here ####
+            if operator == b"'":
+                process_operation(b"T*", [])
+                process_operation(b"Tj", operands)
+            elif operator == b'"':
+                process_operation(b"T*", [])
+                process_operation(b"TJ", operands)
+            elif operator == b"TD":
+                process_operation(b"TL", [-operands[1]])
+                process_operation(b"Td", operands)
+            elif operator == b"TJ":
+                for op in operands[0]:
+                    if isinstance(op, (str, bytes)):
+                        process_operation(b"Tj", [op])
+                    if isinstance(op, (int, float, NumberObject, FloatObject)):
+                        process_operation(b"Td", [-op, 0.0])
+            elif operator == b"Do":
+                output += text
+                if output != "":
+                    output += "\n"
+                try:
+                    xobj = resources_dict["/XObject"]  # type: ignore
+                    if xobj[operands[0]]["/Subtype"] != "/Image":  # type: ignore
+                        output += text
+                        text = self.extract_xform_text(xobj[operands[0]], space_width)  # type: ignore
+                        output += text
+                except Exception:
+                    warnings.warn(
+                        f" impossible to decode XFormObject {operands[0]}",
+                        PdfReadWarning,
+                    )
+                finally:
+                    text = ""
+            else:
+                process_operation(operator, operands)
+        output += text  # just in case of
+        return output
+
+    def extract_text(
+        self, Tj_sep: str = "", TJ_sep: str = "", space_width: float = 200.0
+    ) -> str:
+        """
+        Locate all text drawing commands, in the order they are provided in the
+        content stream, and extract the text.  This works well for some PDF
+        files, but poorly for others, depending on the generator used.  This will
+        be refined in the future.  Do not rely on the order of text coming out of
+        this function, as it will change if this function is made more
+        sophisticated.
+        space_width : float = force default space width (if not extracted from font (default 200)
+
+        :return: a string object.
+        """
+        return self._extract_text(self, self.pdf, space_width, PG.CONTENTS)
+
+    def extract_xform_text(
+        self, xform: EncodedStreamObject, space_width: float = 200.0
+    ) -> str:
+        """
+        Extraction tet from an XObject.
+        space_width : float = force default space width (if not extracted from font (default 200)
+
+        :return: a string object.
+        """
+        return self._extract_text(xform, self.pdf, space_width, None)
+
+    def extractText(
+        self, Tj_sep: str = "", TJ_sep: str = ""
+    ) -> str:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :meth:`extract_text` instead.
+        """
+        deprecate_with_replacement("extractText", "extract_text")
+        return self.extract_text(Tj_sep=Tj_sep, TJ_sep=TJ_sep)
+
+    mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the boundaries of the physical medium on which the page is
+    intended to be displayed or printed.
+    """
+
+    @property
+    def mediaBox(self) -> RectangleObject:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`mediabox` instead.
+        """
+        deprecate_with_replacement("mediaBox", "mediabox")
+        return self.mediabox
+
+    @mediaBox.setter
+    def mediaBox(self, value: RectangleObject) -> None:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`mediabox` instead.
+        """
+        deprecate_with_replacement("mediaBox", "mediabox")
+        self.mediabox = value
+
+    cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the visible region of default user space.  When the page is
+    displayed or printed, its contents are to be clipped (cropped) to this
+    rectangle and then imposed on the output medium in some
+    implementation-defined manner.  Default value: same as :attr:`mediabox<mediabox>`.
+    """
+
+    @property
+    def cropBox(self) -> RectangleObject:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`cropbox` instead.
+        """
+        deprecate_with_replacement("cropBox", "cropbox")
+        return self.cropbox
+
+    @cropBox.setter
+    def cropBox(self, value: RectangleObject) -> None:  # pragma: no cover
+        deprecate_with_replacement("cropBox", "cropbox")
+        self.cropbox = value
+
+    bleedbox = _create_rectangle_accessor("/BleedBox", ("/CropBox", PG.MEDIABOX))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the region to which the contents of the page should be clipped
+    when output in a production enviroment.
+    """
+
+    @property
+    def bleedBox(self) -> RectangleObject:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`bleedbox` instead.
+        """
+        deprecate_with_replacement("bleedBox", "bleedbox")
+        return self.bleedbox
+
+    @bleedBox.setter
+    def bleedBox(self, value: RectangleObject) -> None:  # pragma: no cover
+        deprecate_with_replacement("bleedBox", "bleedbox")
+        self.bleedbox = value
+
+    trimbox = _create_rectangle_accessor("/TrimBox", ("/CropBox", PG.MEDIABOX))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the intended dimensions of the finished page after trimming.
+    """
+
+    @property
+    def trimBox(self) -> RectangleObject:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`trimbox` instead.
+        """
+        deprecate_with_replacement("trimBox", "trimbox")
+        return self.trimbox
+
+    @trimBox.setter
+    def trimBox(self, value: RectangleObject) -> None:  # pragma: no cover
+        deprecate_with_replacement("trimBox", "trimbox")
+        self.trimbox = value
+
+    artbox = _create_rectangle_accessor("/ArtBox", ("/CropBox", PG.MEDIABOX))
+    """
+    A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
+    defining the extent of the page's meaningful content as intended by the
+    page's creator.
+    """
+
+    @property
+    def artBox(self) -> RectangleObject:  # pragma: no cover
+        """
+        .. deprecated:: 1.28.0
+
+            Use :py:attr:`artbox` instead.
+        """
+        deprecate_with_replacement("artBox", "artbox")
+        return self.artbox
+
+    @artBox.setter
+    def artBox(self, value: RectangleObject) -> None:  # pragma: no cover
+        deprecate_with_replacement("artBox", "artbox")
+        self.artbox = value
+
+
+class _VirtualList:
+    def __init__(
+        self,
+        length_function: Callable[[], int],
+        get_function: Callable[[int], PageObject],
+    ) -> None:
+        self.length_function = length_function
+        self.get_function = get_function
+        self.current = -1
+
+    def __len__(self) -> int:
+        return self.length_function()
+
+    def __getitem__(self, index: int) -> PageObject:
+        if isinstance(index, slice):
+            indices = range(*index.indices(len(self)))
+            cls = type(self)
+            return cls(indices.__len__, lambda idx: self[indices[idx]])
+        if not isinstance(index, int):
+            raise TypeError("sequence indices must be integers")
+        len_self = len(self)
+        if index < 0:
+            # support negative indexes
+            index = len_self + index
+        if index < 0 or index >= len_self:
+            raise IndexError("sequence index out of range")
+        return self.get_function(index)
+
+    def __iter__(self) -> Iterator[PageObject]:
+        for i in range(len(self)):
+            yield self[i]
diff --git a/PyPDF2/_utils.py b/PyPDF2/_utils.py
index 910bb6327..24ab68caa 100644
--- a/PyPDF2/_utils.py
+++ b/PyPDF2/_utils.py
@@ -1,331 +1,331 @@
-# Copyright (c) 2006, Mathieu Fenniak
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-"""
-Utility functions for PDF library.
-"""
-__author__ = "Mathieu Fenniak"
-__author_email__ = "biziqe@mathieu.fenniak.net"
-
-import os
-import warnings
-from codecs import getencoder
-from io import (
-    DEFAULT_BUFFER_SIZE,
-    BufferedReader,
-    BufferedWriter,
-    BytesIO,
-    FileIO,
-)
-from typing import Any, Dict, Optional, Tuple, Union, overload
-
-try:
-    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
-    from typing import TypeAlias  # type: ignore[attr-defined]
-except ImportError:
-    from typing_extensions import TypeAlias  # type: ignore[misc]
-
-from .errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
-
-TransformationMatrixType: TypeAlias = Tuple[
-    Tuple[float, float, float], Tuple[float, float, float], Tuple[float, float, float]
-]
-CompressedTransformationMatrix: TypeAlias = Tuple[
-    float, float, float, float, float, float
-]
-
-bytes_type = type(bytes())  # Works the same in Python 2.X and 3.X
-StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
-StrByteType = Union[str, StreamType]
-
-DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 {}."
-DEPR_MSG = "{} is deprecated and will be removed in PyPDF2 3.0.0. Use {} instead."
-
-
-def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
-    """
-    Reads non-whitespace characters and returns them.
-    Stops upon encountering whitespace or when maxchars is reached.
-    """
-    txt = b_("")
-    while True:
-        tok = stream.read(1)
-        if tok.isspace() or not tok:
-            break
-        txt += tok
-        if len(txt) == maxchars:
-            break
-    return txt
-
-
-def read_non_whitespace(stream: StreamType) -> bytes:
-    """
-    Finds and reads the next non-whitespace character (ignores whitespace).
-    """
-    tok = WHITESPACES[0]
-    while tok in WHITESPACES:
-        tok = stream.read(1)
-    return tok
-
-
-def skip_over_whitespace(stream: StreamType) -> bool:
-    """
-    Similar to readNonWhitespace, but returns a Boolean if more than
-    one whitespace character was read.
-    """
-    tok = WHITESPACES[0]
-    cnt = 0
-    while tok in WHITESPACES:
-        tok = stream.read(1)
-        cnt += 1
-    return cnt > 1
-
-
-def skip_over_comment(stream: StreamType) -> None:
-    tok = stream.read(1)
-    stream.seek(-1, 1)
-    if tok == b_("%"):
-        while tok not in (b_("\n"), b_("\r")):
-            tok = stream.read(1)
-
-
-def read_until_regex(stream: StreamType, regex: Any, ignore_eof: bool = False) -> bytes:
-    """
-    Reads until the regular expression pattern matched (ignore the match)
-    :raises PdfStreamError: on premature end-of-file
-    :param bool ignore_eof: If true, ignore end-of-line and return immediately
-    :param regex: re.Pattern
-    """
-    name = b_("")
-    while True:
-        tok = stream.read(16)
-        if not tok:
-            # stream has truncated prematurely
-            if ignore_eof:
-                return name
-            else:
-                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
-        m = regex.search(tok)
-        if m is not None:
-            name += tok[: m.start()]
-            stream.seek(m.start() - len(tok), 1)
-            break
-        name += tok
-    return name
-
-
-CRLF = b"\r\n"
-
-
-def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
-    """Given a stream at position X, read a block of size
-    to_read ending at position X.
-    The stream's position should be unchanged.
-    """
-    if stream.tell() < to_read:
-        raise PdfStreamError("Could not read malformed PDF file")
-    # Seek to the start of the block we want to read.
-    stream.seek(-to_read, os.SEEK_CUR)
-    read = stream.read(to_read)
-    # Seek to the start of the block we read after reading it.
-    stream.seek(-to_read, os.SEEK_CUR)
-    if len(read) != to_read:
-        raise PdfStreamError(f"EOF: read {len(read)}, expected {to_read}?")
-    return read
-
-
-def read_previous_line(stream: StreamType) -> bytes:
-    """Given a byte stream with current position X, return the previous
-    line - all characters between the first CR/LF byte found before X
-    (or, the start of the file, if no such byte is found) and position X
-    After this call, the stream will be positioned one byte after the
-    first non-CRLF character found beyond the first CR/LF byte before X,
-    or, if no such byte is found, at the beginning of the stream.
-    """
-    line_content = []
-    found_crlf = False
-    if stream.tell() == 0:
-        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
-    while True:
-        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
-        if to_read == 0:
-            break
-        # Read the block. After this, our stream will be one
-        # beyond the initial position.
-        block = read_block_backwards(stream, to_read)
-        idx = len(block) - 1
-        if not found_crlf:
-            # We haven't found our first CR/LF yet.
-            # Read off characters until we hit one.
-            while idx >= 0 and block[idx] not in CRLF:
-                idx -= 1
-            if idx >= 0:
-                found_crlf = True
-        if found_crlf:
-            # We found our first CR/LF already (on this block or
-            # a previous one).
-            # Our combined line is the remainder of the block
-            # plus any previously read blocks.
-            line_content.append(block[idx + 1 :])
-            # Continue to read off any more CRLF characters.
-            while idx >= 0 and block[idx] in CRLF:
-                idx -= 1
-        else:
-            # Didn't find CR/LF yet - add this block to our
-            # previously read blocks and continue.
-            line_content.append(block)
-        if idx >= 0:
-            # We found the next non-CRLF character.
-            # Set the stream position correctly, then break
-            stream.seek(idx + 1, os.SEEK_CUR)
-            break
-    # Join all the blocks in the line (which are in reverse order)
-    return b"".join(line_content[::-1])
-
-
-def matrix_multiply(
-    a: TransformationMatrixType, b: TransformationMatrixType
-) -> TransformationMatrixType:
-    return tuple(  # type: ignore[return-value]
-        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
-        for row in a
-    )
-
-
-def mark_location(stream: StreamType) -> None:
-    """Creates text file showing current location in context."""
-    # Mainly for debugging
-    radius = 5000
-    stream.seek(-radius, 1)
-    with open("PyPDF2_pdfLocation.txt", "wb") as output_fh:
-        output_fh.write(stream.read(radius))
-        output_fh.write(b"HERE")
-        output_fh.write(stream.read(radius))
-    stream.seek(-radius, 1)
-
-
-B_CACHE: Dict[Union[str, bytes], bytes] = {}
-
-
-def b_(s: Union[str, bytes]) -> bytes:
-    bc = B_CACHE
-    if s in bc:
-        return bc[s]
-    if isinstance(s, bytes):
-        return s
-    else:
-        try:
-            r = s.encode("latin-1")
-            if len(s) < 2:
-                bc[s] = r
-            return r
-        except Exception:
-            r = s.encode("utf-8")
-            if len(s) < 2:
-                bc[s] = r
-            return r
-
-
-@overload
-def str_(b: str) -> str:
-    ...
-
-
-@overload
-def str_(b: bytes) -> str:
-    ...
-
-
-def str_(b: Union[str, bytes]) -> str:
-    if isinstance(b, bytes):
-        return b.decode("latin-1")
-    else:
-        return b
-
-
-@overload
-def ord_(b: str) -> int:
-    ...
-
-
-@overload
-def ord_(b: bytes) -> bytes:
-    ...
-
-
-@overload
-def ord_(b: int) -> int:
-    ...
-
-
-def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
-    if isinstance(b, str):
-        return ord(b)
-    else:
-        return b
-
-
-def hexencode(b: bytes) -> bytes:
-
-    coder = getencoder("hex_codec")
-    coded = coder(b)  # type: ignore
-    return coded[0]
-
-
-def hex_str(num: int) -> str:
-    return hex(num).replace("L", "")
-
-
-WHITESPACES = [b_(x) for x in [" ", "\n", "\r", "\t", "\x00"]]
-
-
-def paeth_predictor(left: int, up: int, up_left: int) -> int:
-    p = left + up - up_left
-    dist_left = abs(p - left)
-    dist_up = abs(p - up)
-    dist_up_left = abs(p - up_left)
-
-    if dist_left <= dist_up and dist_left <= dist_up_left:
-        return left
-    elif dist_up <= dist_up_left:
-        return up
-    else:
-        return up_left
-
-
-def deprecate(msg: str, stacklevel: int = 3) -> None:
-    warnings.warn(msg, PendingDeprecationWarning, stacklevel=stacklevel)
-
-
-def deprecate_with_replacement(
-    old_name: str, new_name: str, removed_in: str = "3.0.0"
-) -> None:
-    deprecate(DEPR_MSG.format(old_name, new_name, removed_in), 4)
-
-
-def deprecate_no_replacement(name: str, removed_in: str = "3.0.0") -> None:
-    deprecate(DEPR_MSG_NO_REPLACEMENT.format(name, removed_in), 4)
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Utility functions for PDF library.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import os
+import warnings
+from codecs import getencoder
+from io import (
+    DEFAULT_BUFFER_SIZE,
+    BufferedReader,
+    BufferedWriter,
+    BytesIO,
+    FileIO,
+)
+from typing import Any, Dict, Optional, Tuple, Union, overload
+
+try:
+    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
+    from typing import TypeAlias  # type: ignore[attr-defined]
+except ImportError:
+    from typing_extensions import TypeAlias  # type: ignore[misc]
+
+from .errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
+
+TransformationMatrixType: TypeAlias = Tuple[
+    Tuple[float, float, float], Tuple[float, float, float], Tuple[float, float, float]
+]
+CompressedTransformationMatrix: TypeAlias = Tuple[
+    float, float, float, float, float, float
+]
+
+bytes_type = bytes  # Works the same in Python 2.X and 3.X
+StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
+StrByteType = Union[str, StreamType]
+
+DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 {}."
+DEPR_MSG = "{} is deprecated and will be removed in PyPDF2 3.0.0. Use {} instead."
+
+
+def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
+    """
+    Reads non-whitespace characters and returns them.
+    Stops upon encountering whitespace or when maxchars is reached.
+    """
+    txt = b_("")
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def read_non_whitespace(stream: StreamType) -> bytes:
+    """
+    Finds and reads the next non-whitespace character (ignores whitespace).
+    """
+    tok = WHITESPACES[0]
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skip_over_whitespace(stream: StreamType) -> bool:
+    """
+    Similar to readNonWhitespace, but returns a Boolean if more than
+    one whitespace character was read.
+    """
+    tok = WHITESPACES[0]
+    cnt = 0
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+        cnt += 1
+    return cnt > 1
+
+
+def skip_over_comment(stream: StreamType) -> None:
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b_("%"):
+        while tok not in (b_("\n"), b_("\r")):
+            tok = stream.read(1)
+
+
+def read_until_regex(stream: StreamType, regex: Any, ignore_eof: bool = False) -> bytes:
+    """
+    Reads until the regular expression pattern matched (ignore the match)
+    :raises PdfStreamError: on premature end-of-file
+    :param bool ignore_eof: If true, ignore end-of-line and return immediately
+    :param regex: re.Pattern
+    """
+    name = b_("")
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            # stream has truncated prematurely
+            if ignore_eof:
+                return name
+            else:
+                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+        m = regex.search(tok)
+        if m is not None:
+            name += tok[: m.start()]
+            stream.seek(m.start() - len(tok), 1)
+            break
+        name += tok
+    return name
+
+
+CRLF = b"\r\n"
+
+
+def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
+    """Given a stream at position X, read a block of size
+    to_read ending at position X.
+    The stream's position should be unchanged.
+    """
+    if stream.tell() < to_read:
+        raise PdfStreamError("Could not read malformed PDF file")
+    # Seek to the start of the block we want to read.
+    stream.seek(-to_read, os.SEEK_CUR)
+    read = stream.read(to_read)
+    # Seek to the start of the block we read after reading it.
+    stream.seek(-to_read, os.SEEK_CUR)
+    if len(read) != to_read:
+        raise PdfStreamError(f"EOF: read {len(read)}, expected {to_read}?")
+    return read
+
+
+def read_previous_line(stream: StreamType) -> bytes:
+    """Given a byte stream with current position X, return the previous
+    line - all characters between the first CR/LF byte found before X
+    (or, the start of the file, if no such byte is found) and position X
+    After this call, the stream will be positioned one byte after the
+    first non-CRLF character found beyond the first CR/LF byte before X,
+    or, if no such byte is found, at the beginning of the stream.
+    """
+    line_content = []
+    found_crlf = False
+    if stream.tell() == 0:
+        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+    while True:
+        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
+        if to_read == 0:
+            break
+        # Read the block. After this, our stream will be one
+        # beyond the initial position.
+        block = read_block_backwards(stream, to_read)
+        idx = len(block) - 1
+        if not found_crlf:
+            # We haven't found our first CR/LF yet.
+            # Read off characters until we hit one.
+            while idx >= 0 and block[idx] not in CRLF:
+                idx -= 1
+            if idx >= 0:
+                found_crlf = True
+        if found_crlf:
+            # We found our first CR/LF already (on this block or
+            # a previous one).
+            # Our combined line is the remainder of the block
+            # plus any previously read blocks.
+            line_content.append(block[idx + 1 :])
+            # Continue to read off any more CRLF characters.
+            while idx >= 0 and block[idx] in CRLF:
+                idx -= 1
+        else:
+            # Didn't find CR/LF yet - add this block to our
+            # previously read blocks and continue.
+            line_content.append(block)
+        if idx >= 0:
+            # We found the next non-CRLF character.
+            # Set the stream position correctly, then break
+            stream.seek(idx + 1, os.SEEK_CUR)
+            break
+    # Join all the blocks in the line (which are in reverse order)
+    return b"".join(line_content[::-1])
+
+
+def matrix_multiply(
+    a: TransformationMatrixType, b: TransformationMatrixType
+) -> TransformationMatrixType:
+    return tuple(  # type: ignore[return-value]
+        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
+        for row in a
+    )
+
+
+def mark_location(stream: StreamType) -> None:
+    """Creates text file showing current location in context."""
+    # Mainly for debugging
+    radius = 5000
+    stream.seek(-radius, 1)
+    with open("PyPDF2_pdfLocation.txt", "wb") as output_fh:
+        output_fh.write(stream.read(radius))
+        output_fh.write(b"HERE")
+        output_fh.write(stream.read(radius))
+    stream.seek(-radius, 1)
+
+
+B_CACHE: Dict[Union[str, bytes], bytes] = {}
+
+
+def b_(s: Union[str, bytes]) -> bytes:
+    bc = B_CACHE
+    if s in bc:
+        return bc[s]
+    if isinstance(s, bytes):
+        return s
+    else:
+        try:
+            r = s.encode("latin-1")
+            if len(s) < 2:
+                bc[s] = r
+            return r
+        except Exception:
+            r = s.encode("utf-8")
+            if len(s) < 2:
+                bc[s] = r
+            return r
+
+
+@overload
+def str_(b: str) -> str:
+    ...
+
+
+@overload
+def str_(b: bytes) -> str:
+    ...
+
+
+def str_(b: Union[str, bytes]) -> str:
+    if isinstance(b, bytes):
+        return b.decode("latin-1")
+    else:
+        return b
+
+
+@overload
+def ord_(b: str) -> int:
+    ...
+
+
+@overload
+def ord_(b: bytes) -> bytes:
+    ...
+
+
+@overload
+def ord_(b: int) -> int:
+    ...
+
+
+def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
+    if isinstance(b, str):
+        return ord(b)
+    else:
+        return b
+
+
+def hexencode(b: bytes) -> bytes:
+
+    coder = getencoder("hex_codec")
+    coded = coder(b)  # type: ignore
+    return coded[0]
+
+
+def hex_str(num: int) -> str:
+    return hex(num).replace("L", "")
+
+
+WHITESPACES = [b_(x) for x in [" ", "\n", "\r", "\t", "\x00"]]
+
+
+def paeth_predictor(left: int, up: int, up_left: int) -> int:
+    p = left + up - up_left
+    dist_left = abs(p - left)
+    dist_up = abs(p - up)
+    dist_up_left = abs(p - up_left)
+
+    if dist_left <= dist_up and dist_left <= dist_up_left:
+        return left
+    elif dist_up <= dist_up_left:
+        return up
+    else:
+        return up_left
+
+
+def deprecate(msg: str, stacklevel: int = 3) -> None:
+    warnings.warn(msg, PendingDeprecationWarning, stacklevel=stacklevel)
+
+
+def deprecate_with_replacement(
+    old_name: str, new_name: str, removed_in: str = "3.0.0"
+) -> None:
+    deprecate(DEPR_MSG.format(old_name, new_name, removed_in), 4)
+
+
+def deprecate_no_replacement(name: str, removed_in: str = "3.0.0") -> None:
+    deprecate(DEPR_MSG_NO_REPLACEMENT.format(name, removed_in), 4)
diff --git a/tests/test_page.py b/tests/test_page.py
index d56ef724b..2aee95103 100644
--- a/tests/test_page.py
+++ b/tests/test_page.py
@@ -9,6 +9,7 @@
 from PyPDF2._page import PageObject
 from PyPDF2.constants import PageAttributes as PG
 from PyPDF2.generic import DictionaryObject, NameObject, RectangleObject
+
 from . import get_pdf_from_url
 
 TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 53b4a6ca5..09797b15f 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -17,6 +17,7 @@
     PdfReadWarning,
 )
 from PyPDF2.filters import _xobj_to_image
+
 from . import get_pdf_from_url
 
 TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index c7dc4034a..b928b858a 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -7,6 +7,7 @@
 
 from PyPDF2 import PdfReader
 from PyPDF2.constants import PageAttributes as PG
+
 from . import get_pdf_from_url
 
 TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
@@ -144,7 +145,11 @@ def test_rotate_45():
             "https://github.com/py-pdf/PyPDF2/files/3796761/17343_2008_Order_09-Jan-2019.pdf",
             [0, 1],
         ),
-        (True, "https://github.com/py-pdf/PyPDF2/files/8884471/ssi_manwaring.pdf", [0, 1]),
+        (
+            True,
+            "https://github.com/py-pdf/PyPDF2/files/8884471/ssi_manwaring.pdf",
+            [0, 1],
+        ),
         (True, "https://github.com/py-pdf/PyPDF2/files/8884469/999092.pdf", [0, 1]),
         (
             True,
@@ -156,11 +161,7 @@ def test_rotate_45():
             "https://github.com/py-pdf/PyPDF2/files/8884470/fdocuments.in_sweet-fundamentals-of-crystallography.pdf",
             [0, 1, 34, 35, 36, 118, 119, 120, 121],
         ),
-        (
-            True,
-            "https://github.com/py-pdf/PyPDF2/files/8884493/998167.pdf",
-            [0]
-        ),
+        (True, "https://github.com/py-pdf/PyPDF2/files/8884493/998167.pdf", [0]),
     ],
 )
 def test_extract_textbench(enable, url, pages, print_result=False):