diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 2a08dfb599..6ae08ab854 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -325,30 +325,30 @@ def decode(data, decodeParms=None): if version_info < ( 3, 0 ): retval = "" group = [] - x = 0 - hitEod = False + index = 0 + hit_eod = False # remove all whitespace from data data = [y for y in data if y not in ' \n\r\t'] - while not hitEod: - c = data[x] - if len(retval) == 0 and c == "<" and data[x+1] == "~": - x += 2 + while not hit_eod: + c = data[index] + if len(retval) == 0 and c == "<" and data[index+1] == "~": + index += 2 continue # elif c.isspace(): - # x += 1 + # index += 1 # continue elif c == 'z': assert len(group) == 0 retval += '\x00\x00\x00\x00' - x += 1 + index += 1 continue - elif c == "~" and data[x+1] == ">": + elif c == "~" and data[index+1] == ">": if len(group) != 0: # cannot have a final group of just 1 char assert len(group) > 1 cnt = len(group) - 1 group += [ 85, 85, 85 ] - hitEod = cnt + hit_eod = cnt else: break else: @@ -361,37 +361,42 @@ def decode(data, decodeParms=None): group[2] * (85**2) + \ group[3] * 85 + \ group[4] + if b > (2**32 - 1): + raise OverflowError( + "The sum of a ASCII85-encoded 4-byte group shall " + "not exceed 2 ^ 32 - 1. See ISO 32000, 2008, 7.4.3" + ) assert b <= (2**32 - 1) c4 = chr((b >> 0) % 256) c3 = chr((b >> 8) % 256) c2 = chr((b >> 16) % 256) c1 = chr(b >> 24) retval += (c1 + c2 + c3 + c4) - if hitEod: - retval = retval[:-4+hitEod] + if hit_eod: + retval = retval[:-4+hit_eod] group = [] - x += 1 + index += 1 return retval else: if isinstance(data, str): data = data.encode('ascii') - n = b = 0 + group_index = b = 0 out = bytearray() for c in data: if ord('!') <= c and c <= ord('u'): - n += 1 + group_index += 1 b = b*85+(c-33) - if n == 5: + if group_index == 5: out += struct.pack(b'>L',b) - n = b = 0 + group_index = b = 0 elif c == ord('z'): - assert n == 0 + assert group_index == 0 out += b'\0\0\0\0' elif c == ord('~'): - if n: - for _ in range(5-n): + if group_index: + for _ in range(5-group_index): b = b*85+84 - out += struct.pack(b'>L',b)[:n-1] + out += struct.pack(b'>L',b)[:group_index-1] break return bytes(out) diff --git a/Tests/test_filters.py b/Tests/test_filters.py index 4c1ff8e780..c9d16fd38a 100644 --- a/Tests/test_filters.py +++ b/Tests/test_filters.py @@ -1,10 +1,11 @@ +# -*- coding: utf-8 -*- import string from itertools import product as cartesian_product import pytest from PyPDF2.errors import PdfReadError, PdfStreamError -from PyPDF2.filters import ASCIIHexDecode, FlateDecode +from PyPDF2.filters import ASCIIHexDecode, FlateDecode, ASCII85Decode filter_inputs = ( # "", '', """""", @@ -97,3 +98,43 @@ def test_ASCIIHexDecode_no_eod(): with pytest.raises(PdfStreamError) as exc: ASCIIHexDecode.decode("") assert exc.value.args[0] == "Unexpected EOD in ASCIIHexDecode" + + +@pytest.mark.xfail +def test_ASCII85Decode_with_overflow(): + inputs = ( + v + "~>" + for v in "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a" + "\x1b\x1c\x1d\x1e\x1fvwxy{|}~\x7f\x80\x81\x82" + "\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d" + "\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98" + "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0¡¢£¤¥¦§¨©ª«¬" + "\xad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇ" + ) + + for i in inputs: + with pytest.raises(ValueError) as exc: + ASCII85Decode.decode(i) + assert exc.value.args[0] == "" + + +@pytest.mark.no_py27 +def test_ASCII85Decode_five_zero_bytes(): + """ + From ISO 32000 (2008) §7.4.3: + «As a special case, if all five bytes are 0, they shall be represented + by the character with code 122 (z) instead of by five exclamation + points (!!!!!).» + """ + inputs = ("z", "zz", "zzz") + exp_outputs = ( + b"\x00\x00\x00\x00", + b"\x00\x00\x00\x00" * 2, + b"\x00\x00\x00\x00" * 3, + ) + + assert ASCII85Decode.decode("!!!!!") == ASCII85Decode.decode("z") + + for expected, i in zip(exp_outputs, inputs): + assert ASCII85Decode.decode(i) == expected