From a41c4979bc5f18224711352390bbe1a68ad1392c Mon Sep 17 00:00:00 2001
From: Michael Karlen <michael.karlen@gmail.com>
Date: Fri, 16 Sep 2022 13:31:54 +0200
Subject: [PATCH] Fix performance issues with large embedded base64 images

Certain PDF libraries do embed images as base64 strings. This causes performance issues
in `read_string_from_stream` due to incremental string concatenation, byte by byte.

PDF Lib in our case is
```
<xmp:CreatorTool>Canon iR-ADV C256  PDF</xmp:CreatorTool>
<pdf:Producer>PDF Annotator 8.0.0.826 [Adobe PSL 1.3e for Canon</pdf:Producer>

```
---
 PyPDF2/generic/_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/PyPDF2/generic/_utils.py b/PyPDF2/generic/_utils.py
index c5b6129f0..b7a0ee416 100644
--- a/PyPDF2/generic/_utils.py
+++ b/PyPDF2/generic/_utils.py
@@ -41,7 +41,7 @@ def read_string_from_stream(
 ) -> Union["TextStringObject", "ByteStringObject"]:
     tok = stream.read(1)
     parens = 1
-    txt = b""
+    txt = []
     while True:
         tok = stream.read(1)
         if not tok:
@@ -106,8 +106,8 @@ def read_string_from_stream(
                 else:
                     msg = rf"Unexpected escaped string: {tok.decode('utf8')}"
                     logger_warning(msg, __name__)
-        txt += tok
-    return create_string_object(txt, forced_encoding)
+        txt.append(tok)
+    return create_string_object(b''.join(txt), forced_encoding)
 
 
 def create_string_object(
@@ -164,7 +164,7 @@ def decode_pdfdocencoding(byte_array: bytes) -> str:
             raise UnicodeDecodeError(
                 "pdfdocencoding",
                 bytearray(b),
-                -1,
+                    -1,
                 -1,
                 "does not exist in translation table",
             )