From 699e1ad0fbdcf28396163f8ae47d0158c6f6eb27 Mon Sep 17 00:00:00 2001
From: dsk7 <jensg@posteo.de>
Date: Sat, 23 Apr 2022 17:39:18 +0200
Subject: [PATCH] BUG: fix pdf reader getting stuck when trying to read large
 files without xref marker

---
 PyPDF2/pdf.py        |  8 ++++----
 Tests/test_reader.py | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
index 035eeb223..a6e361edb 100644
--- a/PyPDF2/pdf.py
+++ b/PyPDF2/pdf.py
@@ -2082,7 +2082,7 @@ def _pairs(self, array):
     def readNextEndLine(self, stream, limit_offset=0):
         debug = False
         if debug: print(">>readNextEndLine")
-        line = b_("")
+        line_parts = []
         while True:
             # Prevent infinite loops in malformed PDFs
             if stream.tell() == 0 or stream.tell() == limit_offset:
@@ -2109,10 +2109,10 @@ def readNextEndLine(self, stream, limit_offset=0):
                 break
             else:
                 if debug: print("  x is neither")
-                line = x + line
-                if debug: print(("  RNEL line:", line))
+                line_parts.append(x)
         if debug: print("leaving RNEL")
-        return line
+        line_parts.reverse()
+        return b"".join(line_parts)
 
     def decrypt(self, password):
         """
diff --git a/Tests/test_reader.py b/Tests/test_reader.py
index d38b32ab9..5230ee02e 100644
--- a/Tests/test_reader.py
+++ b/Tests/test_reader.py
@@ -1,5 +1,6 @@
 import io
 import os
+import time
 
 import pytest
 
@@ -9,6 +10,14 @@
 from PyPDF2.constants import Ressources as RES
 from PyPDF2.errors import PdfReadError
 from PyPDF2.filters import _xobj_to_image
+from sys import version_info
+
+if version_info < ( 3, 0 ):
+    from cStringIO import StringIO
+    StreamIO = StringIO
+else:
+    from io import BytesIO
+    StreamIO = BytesIO
 
 TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
 PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
@@ -462,3 +471,16 @@ def test_get_destination_age_number():
     for outline in outlines:
         if not isinstance(outline, list):
             reader.getDestinationPageNumber(outline)
+
+
+def test_do_not_get_stuck_on_large_files_without_start_xref():
+    """Tests for the absence of a DoS bug, where a large file without an startxref mark
+    would cause the library to hang for minutes to hours """
+    start_time = time.time()
+    broken_stream = StreamIO(b"\0" * 5 * 1000 * 1000)
+    with pytest.raises(PdfReadError):
+        PdfFileReader(broken_stream)
+    parse_duration = time.time() - start_time
+    # parsing is expected take less than a second on a modern cpu, but include a large
+    # tolerance to account for busy or slow systems
+    assert parse_duration < 60