From 46f735a0490c1880ec9d16285393bc0e2bc7d325 Mon Sep 17 00:00:00 2001 From: speedplane Date: Tue, 28 Feb 2017 00:25:00 -0500 Subject: [PATCH 1/7] Fix an issue with parsing inline images. We must look for an EI with whitespace before and after it, not just after it. --- PyPDF2/pdf.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index 9979414ff..91bc28671 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -2727,11 +2727,21 @@ def _readInlineImage(self, stream): while True: # Read the inline image, while checking for EI (End Image) operator. tok = stream.read(1) + if not tok: + if self.strict: + raise utils.PdfReadError("No end to inline image.") + # Even though we're not raising, this is almost certainly bad. + break if tok == b_("E"): # Check for End Image tok2 = stream.read(1) if tok2 == b_("I"): - # Data can contain EI, so check for the Q operator. + # Data can contain EI, so check for whitespace before + if not data or data[-1] not in utils.WHITESPACES: + # No whitespace, add the E and I and keep moving. + data += tok + tok2 + continue + # Now try to find the Q operator. tok3 = stream.read(1) info = tok + tok2 # We need to find whitespace between EI and Q. From b847fb9f4b1ffc61218ca3a7c54b92af69231e32 Mon Sep 17 00:00:00 2001 From: speedplane Date: Wed, 7 Nov 2018 22:05:53 -0500 Subject: [PATCH 2/7] Fix an off by one error here, this value can represent the full 32 bit range. --- PyPDF2/filters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 3717fd4c5..3bdca32e0 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -296,7 +296,8 @@ def decode(data, decodeParms=None): group[2] * (85**2) + \ group[3] * 85 + \ group[4] - assert b < (2**32 - 1) + # Can represent values between 0 and 2^32 - 1 + assert b <= (2**32 - 1) c4 = chr((b >> 0) % 256) c3 = chr((b >> 8) % 256) c2 = chr((b >> 16) % 256) From 8b35ab55f8b7d9640037f3627505217a4bd8f4da Mon Sep 17 00:00:00 2001 From: speedplane Date: Wed, 7 Nov 2018 22:06:07 -0500 Subject: [PATCH 3/7] Add a bit more logging on this type of error. --- PyPDF2/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index c4332297d..7ee46572b 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -629,7 +629,9 @@ def readFromStream(stream, pdf): else: if debug: print(("E", e, ndstream, debugging.toHex(end))) stream.seek(pos, 0) - raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())) + raise utils.PdfReadError("Unable to find 'endstream' " + "marker after stream at byte %s. (nd='%s', end='%s')"%( + utils.hexStr(stream.tell()), ndstream, end)) else: stream.seek(pos, 0) if "__streamdata__" in data: From ba1799dd3c35fccd117b1934705794fb25bc9930 Mon Sep 17 00:00:00 2001 From: speedplane Date: Tue, 28 Feb 2017 00:25:00 -0500 Subject: [PATCH 4/7] Fix an issue with parsing inline images. We must look for an EI with whitespace before and after it, not just after it. --- PyPDF2/pdf.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index c5cae9e67..5ea7dbe7a 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -2794,11 +2794,21 @@ def _readInlineImage(self, stream): while True: # Read the inline image, while checking for EI (End Image) operator. tok = stream.read(1) + if not tok: + if self.strict: + raise utils.PdfReadError("No end to inline image.") + # Even though we're not raising, this is almost certainly bad. + break if tok == b_("E"): # Check for End Image tok2 = stream.read(1) if tok2 == b_("I"): - # Data can contain EI, so check for the Q operator. + # Data can contain EI, so check for whitespace before + if not data or data[-1] not in utils.WHITESPACES: + # No whitespace, add the E and I and keep moving. + data += tok + tok2 + continue + # Now try to find the Q operator. tok3 = stream.read(1) info = tok + tok2 # We need to find whitespace between EI and Q. From cae961d833c9190fb16c283197a509e02815c529 Mon Sep 17 00:00:00 2001 From: speedplane Date: Wed, 7 Nov 2018 22:05:53 -0500 Subject: [PATCH 5/7] Fix an off by one error here, this value can represent the full 32 bit range. --- PyPDF2/filters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 57446f4b0..f4db6ac6e 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -310,7 +310,8 @@ def decode(data, decodeParms=None): group[2] * (85**2) + \ group[3] * 85 + \ group[4] - assert b < (2**32 - 1) + # Can represent values between 0 and 2^32 - 1 + assert b <= (2**32 - 1) c4 = chr((b >> 0) % 256) c3 = chr((b >> 8) % 256) c2 = chr((b >> 16) % 256) From d6ed4c7a7c50c3a3c17bd77f659b9967ed1a2660 Mon Sep 17 00:00:00 2001 From: speedplane Date: Wed, 7 Nov 2018 22:06:07 -0500 Subject: [PATCH 6/7] Add a bit more logging on this type of error. --- PyPDF2/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 959957dde..9f3310058 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -631,7 +631,9 @@ def readFromStream(stream, pdf): else: if debug: print(("E", e, ndstream, debugging.toHex(end))) stream.seek(pos, 0) - raise utils.PdfReadError("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())) + raise utils.PdfReadError("Unable to find 'endstream' " + "marker after stream at byte %s. (nd='%s', end='%s')"%( + utils.hexStr(stream.tell()), ndstream, end)) else: stream.seek(pos, 0) if "__streamdata__" in data: From a53f72e76efd64f69812a7af2d86e125d3948da6 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 16 Apr 2022 15:39:47 +0200 Subject: [PATCH 7/7] Update PyPDF2/filters.py --- PyPDF2/filters.py | 1 - 1 file changed, 1 deletion(-) diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index c6b622c7b..bb56234a7 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -321,7 +321,6 @@ def decode(data, decodeParms=None): group[2] * (85**2) + \ group[3] * 85 + \ group[4] - # Can represent values between 0 and 2^32 - 1 assert b <= (2**32 - 1) c4 = chr((b >> 0) % 256) c3 = chr((b >> 8) % 256)