py-pdf · dsk7 · Oct 22, 2016
diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -1758,11 +1758,11 @@ def read(self, stream):
         while line[:5] != b_("%%EOF"):
             if stream.tell() < last1K:
                 raise utils.PdfReadError("EOF marker not found")
-            line = self.readNextEndLine(stream)
+            line = self.readNextEndLine(stream, maxLineLength=1024)
             if debug: print("  line:",line)
 
         # find startxref entry - the location of the xref table
-        line = self.readNextEndLine(stream)
+        line = self.readNextEndLine(stream, maxLineLength=1024)
         try:
             startxref = int(line)
         except ValueError:
@@ -1772,7 +1772,7 @@ def read(self, stream):
             startxref = int(line[9:].strip())
             warnings.warn("startxref on same line as offset")
         else:
-            line = self.readNextEndLine(stream)
+            line = self.readNextEndLine(stream, maxLineLength=1024)
             if line[:9] != b_("startxref"):
                 raise utils.PdfReadError("startxref not found")
 
@@ -1991,7 +1991,10 @@ def _pairs(self, array):
             if (i+1) >= len(array):
                 break
 
-    def readNextEndLine(self, stream):
+    def readNextEndLine(self, stream, maxLineLength=None):
+        '''
+        if maxLineLength is set, an PdfReadError will be raised if the line candidate is longer than this value
+        '''
         debug = False
         if debug: print(">>readNextEndLine")
         line = b_("")
@@ -2023,6 +2026,8 @@ def readNextEndLine(self, stream):
                 if debug: print("  x is neither")
                 line = x + line
                 if debug: print(("  RNEL line:", line))
+                if maxLineLength is not None and len(line) > maxLineLength:
+                    raise utils.PdfReadError("EOL marker not found")
         if debug: print("leaving RNEL")
         return line
 

diff --git a/Tests/tests.py b/Tests/tests.py
@@ -1,8 +1,9 @@
 import os
 import sys
 import unittest
+import StringIO
 
-from PyPDF2 import PdfFileReader, PdfFileWriter
+from PyPDF2 import PdfFileReader, PdfFileWriter, utils
 
 
 # Configure path environment
@@ -37,6 +38,15 @@ def test_PdfReaderFileLoad(self):
                     % (pdftext, ipdf_p1_text.encode('utf-8', errors='ignore')))
 
 
+    def test_PdfReaderDoesNotGetStuckOnLargeFilesWithoutStartxref(self):
+        '''Tests the absence of a "DOS"-kind of bug, where a large file without an startxref
+        will cause the library to hang'''
+        broken_stream = StringIO.StringIO(chr(0) * 10 * 1000 * 1000)
+
+        with self.assertRaises(utils.PdfReadError):
+            PdfFileReader(broken_stream)
+
+
 class AddJsTestCase(unittest.TestCase):
 
     def setUp(self):