Skip to content

Commit

Permalink
MAINT: Mark read_next_end_line as deprecated (#965)
Browse files Browse the repository at this point in the history
read_next_end_line was removed with #646, but we need to keep it in order to keep backwards compatibility.
  • Loading branch information
MartinThoma authored Jun 9, 2022
1 parent 8cd0cfe commit 336d659
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 39 deletions.
42 changes: 40 additions & 2 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import os
import re
import struct
import warnings
from hashlib import md5
import os
from io import BytesIO
from typing import (
Any,
Expand All @@ -55,8 +55,8 @@
deprecate_with_replacement,
ord_,
read_non_whitespace,
read_until_whitespace,
read_previous_line,
read_until_whitespace,
skip_over_comment,
skip_over_whitespace,
)
Expand Down Expand Up @@ -1554,6 +1554,44 @@ def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
if (i + 1) >= len(array):
break

def read_next_end_line(self, stream: StreamType, limit_offset: int = 0) -> bytes:
""".. deprecated:: 2.1.0"""
deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
line_parts = []
while True:
# Prevent infinite loops in malformed PDFs
if stream.tell() == 0 or stream.tell() == limit_offset:
raise PdfReadError("Could not read malformed PDF file")
x = stream.read(1)
if stream.tell() < 2:
raise PdfReadError("EOL marker not found")
stream.seek(-2, 1)
if x == b_("\n") or x == b_("\r"): # \n = LF; \r = CR
crlf = False
while x == b_("\n") or x == b_("\r"):
x = stream.read(1)
if x == b_("\n") or x == b_("\r"): # account for CR+LF
stream.seek(-1, 1)
crlf = True
if stream.tell() < 2:
raise PdfReadError("EOL marker not found")
stream.seek(-2, 1)
stream.seek(
2 if crlf else 1, 1
) # if using CR+LF, go back 2 bytes, else 1
break
else:
line_parts.append(x)
line_parts.reverse()
return b"".join(line_parts)

def readNextEndLine(
self, stream: StreamType, limit_offset: int = 0
) -> bytes: # pragma: no cover
""".. deprecated:: 1.28.0"""
deprecate_no_replacement("readNextEndLine")
return self.read_next_end_line(stream, limit_offset)

def decrypt(self, password: Union[str, bytes]) -> int:
"""
When using an encrypted / secured PDF file with the PDF Standard
Expand Down
32 changes: 20 additions & 12 deletions PyPDF2/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@
__author__ = "Mathieu Fenniak"
__author_email__ = "[email protected]"

import os
import warnings
from codecs import getencoder
from io import BufferedReader, BufferedWriter, BytesIO, FileIO, DEFAULT_BUFFER_SIZE
import os
from io import (
DEFAULT_BUFFER_SIZE,
BufferedReader,
BufferedWriter,
BytesIO,
FileIO,
)
from typing import Any, Dict, Optional, Tuple, Union, overload

try:
Expand All @@ -56,7 +62,7 @@
StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
StrByteType = Union[str, StreamType]

DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 3.0.0."
DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 {}."
DEPR_MSG = "{} is deprecated and will be removed in PyPDF2 3.0.0. Use {} instead."


Expand Down Expand Up @@ -132,7 +138,7 @@ def read_until_regex(stream: StreamType, regex: Any, ignore_eof: bool = False) -
return name


CRLF = b'\r\n'
CRLF = b"\r\n"


def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
Expand All @@ -141,14 +147,14 @@ def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
The stream's position should be unchanged.
"""
if stream.tell() < to_read:
raise PdfStreamError('Could not read malformed PDF file')
raise PdfStreamError("Could not read malformed PDF file")
# Seek to the start of the block we want to read.
stream.seek(-to_read, os.SEEK_CUR)
read = stream.read(to_read)
# Seek to the start of the block we read after reading it.
stream.seek(-to_read, os.SEEK_CUR)
if len(read) != to_read:
raise PdfStreamError('EOF: read %s, expected %s?' % (len(read), to_read))
raise PdfStreamError(f"EOF: read {len(read)}, expected {to_read}?")
return read


Expand Down Expand Up @@ -184,7 +190,7 @@ def read_previous_line(stream: StreamType) -> bytes:
# a previous one).
# Our combined line is the remainder of the block
# plus any previously read blocks.
line_content.append(block[idx + 1:])
line_content.append(block[idx + 1 :])
# Continue to read off any more CRLF characters.
while idx >= 0 and block[idx] in CRLF:
idx -= 1
Expand All @@ -198,7 +204,7 @@ def read_previous_line(stream: StreamType) -> bytes:
stream.seek(idx + 1, os.SEEK_CUR)
break
# Join all the blocks in the line (which are in reverse order)
return b''.join(line_content[::-1])
return b"".join(line_content[::-1])


def matrix_multiply(
Expand Down Expand Up @@ -315,9 +321,11 @@ def deprecate(msg: str, stacklevel: int = 3) -> None:
warnings.warn(msg, PendingDeprecationWarning, stacklevel=stacklevel)


def deprecate_with_replacement(old_name: str, new_name: str) -> None:
deprecate(DEPR_MSG.format(old_name, new_name), 4)
def deprecate_with_replacement(
old_name: str, new_name: str, removed_in: str = "3.0.0"
) -> None:
deprecate(DEPR_MSG.format(old_name, new_name, removed_in), 4)


def deprecate_no_replacement(name: str) -> None:
deprecate(DEPR_MSG_NO_REPLACEMENT.format(name), 4)
def deprecate_no_replacement(name: str, removed_in: str = "3.0.0") -> None:
deprecate(DEPR_MSG_NO_REPLACEMENT.format(name, removed_in), 4)
66 changes: 41 additions & 25 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from PyPDF2._utils import (
mark_location,
matrix_multiply,
read_block_backwards,
read_previous_line,
read_until_regex,
read_until_whitespace,
skip_over_comment,
skip_over_whitespace,
read_block_backwards,
read_previous_line
)
from PyPDF2.errors import PdfStreamError

Expand Down Expand Up @@ -128,9 +128,9 @@ def test_paeth_predictor(left, up, upleft, expected):
@pytest.mark.parametrize(
("dat", "pos", "to_read"),
[
(b'', 0, 1),
(b'a', 0, 1),
(b'abc', 0, 10),
(b"", 0, 1),
(b"a", 0, 1),
(b"abc", 0, 10),
],
)
def test_read_block_backwards_errs(dat, pos, to_read):
Expand All @@ -143,13 +143,13 @@ def test_read_block_backwards_errs(dat, pos, to_read):
@pytest.mark.parametrize(
("dat", "pos", "to_read", "expected", "expected_pos"),
[
(b'abc', 1, 0, b'', 1),
(b'abc', 1, 1, b'a', 0),
(b'abc', 2, 1, b'b', 1),
(b'abc', 2, 2, b'ab', 0),
(b'abc', 3, 1, b'c', 2),
(b'abc', 3, 2, b'bc', 1),
(b'abc', 3, 3, b'abc', 0),
(b"abc", 1, 0, b"", 1),
(b"abc", 1, 1, b"a", 0),
(b"abc", 2, 1, b"b", 1),
(b"abc", 2, 2, b"ab", 0),
(b"abc", 3, 1, b"c", 2),
(b"abc", 3, 2, b"bc", 1),
(b"abc", 3, 3, b"abc", 0),
],
)
def test_read_block_backwards(dat, pos, to_read, expected, expected_pos):
Expand All @@ -160,30 +160,46 @@ def test_read_block_backwards(dat, pos, to_read, expected, expected_pos):


def test_read_block_backwards_at_start():
s = io.BytesIO(b'abc')
s = io.BytesIO(b"abc")
with pytest.raises(PdfStreamError) as _:
read_previous_line(s)


@pytest.mark.parametrize(
("dat", "pos", "expected", "expected_pos"),
[
(b'abc', 1, b'a', 0),
(b'abc', 2, b'ab', 0),
(b'abc', 3, b'abc', 0),
(b'abc\n', 3, b'abc', 0),
(b'abc\n', 4, b'', 3),
(b'abc\n\r', 4, b'', 3),
(b'abc\nd', 5, b'd', 3),
(b"abc", 1, b"a", 0),
(b"abc", 2, b"ab", 0),
(b"abc", 3, b"abc", 0),
(b"abc\n", 3, b"abc", 0),
(b"abc\n", 4, b"", 3),
(b"abc\n\r", 4, b"", 3),
(b"abc\nd", 5, b"d", 3),
# Skip over multiple CR/LF bytes
(b'abc\n\r\ndef', 9, b'def', 3),
(b"abc\n\r\ndef", 9, b"def", 3),
# Include a block full of newlines...
(b'abc' + b'\n' * (2 * io.DEFAULT_BUFFER_SIZE) + b'd', 2 * io.DEFAULT_BUFFER_SIZE + 4, b'd', 3),
(
b"abc" + b"\n" * (2 * io.DEFAULT_BUFFER_SIZE) + b"d",
2 * io.DEFAULT_BUFFER_SIZE + 4,
b"d",
3,
),
# Include a block full of non-newline characters
(b'abc\n' + b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 2 * io.DEFAULT_BUFFER_SIZE + 4, b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 3),
(
b"abc\n" + b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
2 * io.DEFAULT_BUFFER_SIZE + 4,
b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
3,
),
# Both
(b'abcxyz' + b'\n' * (2 * io.DEFAULT_BUFFER_SIZE) + b'd' * (2 * io.DEFAULT_BUFFER_SIZE),\
4 * io.DEFAULT_BUFFER_SIZE + 6, b'd' * (2 * io.DEFAULT_BUFFER_SIZE), 6),
(
b"abcxyz"
+ b"\n" * (2 * io.DEFAULT_BUFFER_SIZE)
+ b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
4 * io.DEFAULT_BUFFER_SIZE + 6,
b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
6,
),
],
)
def test_read_previous_line(dat, pos, expected, expected_pos):
Expand Down

0 comments on commit 336d659

Please sign in to comment.