Skip to content

Commit

Permalink
TST: Add Test for ASCII85Decode (#825)
Browse files Browse the repository at this point in the history
Full credit to
6dc90b1

Co-authored-by: Acsor <[email protected]>
  • Loading branch information
MartinThoma and acsor committed Apr 28, 2022
1 parent 750950a commit 5e5aef4
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 22 deletions.
47 changes: 26 additions & 21 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,30 +325,30 @@ def decode(data, decodeParms=None):
if version_info < ( 3, 0 ):
retval = ""
group = []
x = 0
hitEod = False
index = 0
hit_eod = False
# remove all whitespace from data
data = [y for y in data if y not in ' \n\r\t']
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
x += 2
while not hit_eod:
c = data[index]
if len(retval) == 0 and c == "<" and data[index+1] == "~":
index += 2
continue
# elif c.isspace():
# x += 1
# index += 1
# continue
elif c == 'z':
assert len(group) == 0
retval += '\x00\x00\x00\x00'
x += 1
index += 1
continue
elif c == "~" and data[x+1] == ">":
elif c == "~" and data[index+1] == ">":
if len(group) != 0:
# cannot have a final group of just 1 char
assert len(group) > 1
cnt = len(group) - 1
group += [ 85, 85, 85 ]
hitEod = cnt
hit_eod = cnt
else:
break
else:
Expand All @@ -361,37 +361,42 @@ def decode(data, decodeParms=None):
group[2] * (85**2) + \
group[3] * 85 + \
group[4]
if b > (2**32 - 1):
raise OverflowError(
"The sum of a ASCII85-encoded 4-byte group shall "
"not exceed 2 ^ 32 - 1. See ISO 32000, 2008, 7.4.3"
)
assert b <= (2**32 - 1)
c4 = chr((b >> 0) % 256)
c3 = chr((b >> 8) % 256)
c2 = chr((b >> 16) % 256)
c1 = chr(b >> 24)
retval += (c1 + c2 + c3 + c4)
if hitEod:
retval = retval[:-4+hitEod]
if hit_eod:
retval = retval[:-4+hit_eod]
group = []
x += 1
index += 1
return retval
else:
if isinstance(data, str):
data = data.encode('ascii')
n = b = 0
group_index = b = 0
out = bytearray()
for c in data:
if ord('!') <= c and c <= ord('u'):
n += 1
group_index += 1
b = b*85+(c-33)
if n == 5:
if group_index == 5:
out += struct.pack(b'>L',b)
n = b = 0
group_index = b = 0
elif c == ord('z'):
assert n == 0
assert group_index == 0
out += b'\0\0\0\0'
elif c == ord('~'):
if n:
for _ in range(5-n):
if group_index:
for _ in range(5-group_index):
b = b*85+84
out += struct.pack(b'>L',b)[:n-1]
out += struct.pack(b'>L',b)[:group_index-1]
break
return bytes(out)

Expand Down
43 changes: 42 additions & 1 deletion Tests/test_filters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# -*- coding: utf-8 -*-
import string
from itertools import product as cartesian_product

import pytest

from PyPDF2.errors import PdfReadError, PdfStreamError
from PyPDF2.filters import ASCIIHexDecode, FlateDecode
from PyPDF2.filters import ASCIIHexDecode, FlateDecode, ASCII85Decode

filter_inputs = (
# "", '', """""",
Expand Down Expand Up @@ -97,3 +98,43 @@ def test_ASCIIHexDecode_no_eod():
with pytest.raises(PdfStreamError) as exc:
ASCIIHexDecode.decode("")
assert exc.value.args[0] == "Unexpected EOD in ASCIIHexDecode"


@pytest.mark.xfail
def test_ASCII85Decode_with_overflow():
inputs = (
v + "~>"
for v in "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f"
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a"
"\x1b\x1c\x1d\x1e\x1fvwxy{|}~\x7f\x80\x81\x82"
"\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d"
"\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98"
"\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0¡¢£¤¥¦§¨©ª«¬"
"\xad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇ"
)

for i in inputs:
with pytest.raises(ValueError) as exc:
ASCII85Decode.decode(i)
assert exc.value.args[0] == ""


@pytest.mark.no_py27
def test_ASCII85Decode_five_zero_bytes():
"""
From ISO 32000 (2008) §7.4.3:
«As a special case, if all five bytes are 0, they shall be represented
by the character with code 122 (z) instead of by five exclamation
points (!!!!!).»
"""
inputs = ("z", "zz", "zzz")
exp_outputs = (
b"\x00\x00\x00\x00",
b"\x00\x00\x00\x00" * 2,
b"\x00\x00\x00\x00" * 3,
)

assert ASCII85Decode.decode("!!!!!") == ASCII85Decode.decode("z")

for expected, i in zip(exp_outputs, inputs):
assert ASCII85Decode.decode(i) == expected

0 comments on commit 5e5aef4

Please sign in to comment.