Skip to content

Commit

Permalink
Remove duplication
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Apr 27, 2022
1 parent 46fd5dd commit 3c078ce
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,9 +1838,13 @@ def read(self, stream):
raise PdfReadError("startxref not found")

# check and eventually correct the startxref only in not strict
rebuildXrefTable = self.is_xref_broken(stream, startxref)
if self.strict and rebuildXrefTable:
xref_issue_nr = self._get_xref_issues(stream, startxref)
if self.strict and xref_issue_nr:
raise PdfReadError("Broken xref table")
else:
warnings.warn(
"incorrect startxref pointer({})".format(xref_issue_nr), PdfReadWarning
)

# read all cross reference tables and their trailers
self.xref = {}
Expand Down Expand Up @@ -1927,7 +1931,7 @@ def read(self, stream):
startxref = newTrailer["/Prev"]
else:
break
elif rebuildXrefTable:
elif xref_issue_nr:
self._rebuild_xref_table(stream)
break
elif x.isdigit():
Expand Down Expand Up @@ -2027,32 +2031,29 @@ def used_before(num, generation):
stream.seek(loc, 0) # return to where it was

@staticmethod
def is_xref_broken(stream, startxref):
def _get_xref_issues(stream, startxref):
"""Returns an int which indicates an issue. 0 means there is no issue."""
stream.seek(startxref - 1, 0) # -1 to check character before
line = stream.read(1)
if line not in b_("\r\n \t"):
warnings.warn("incorrect startxref pointer(1)", PdfReadWarning)
return True
return 1
line = stream.read(4)
if line != b_("xref"):
# not an xref so check if it is an XREF object
line = b_("")
while line in b_("0123456789 \t"):
line = stream.read(1)
if line == b_(""):
warnings.warn("incorrect startxref pointer(2)", PdfReadWarning)
return True
return 2
line += stream.read(2) # 1 char already read, +2 to check "obj"
if line.lower() != b_("obj"):
warnings.warn("incorrect startxref pointer(3)", PdfReadWarning)
return True
return 3
while stream.read(1) in b_(" \t\r\n"):
pass
line = stream.read(256) # check that it is xref obj
if b_("/xref") not in line.lower():
warnings.warn("incorrect startxref pointer(4)", PdfReadWarning)
return True
return False
return 4
return 0

def _rebuild_xref_table(self, stream):
self.xref = {}
Expand Down

0 comments on commit 3c078ce

Please sign in to comment.