Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEV: Use relative imports #875

Merged
merged 2 commits into from
May 14, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
DEV: Use relative imports
This is important for some PyPDF2 developers development workflows.
It allows them to compare changes more easily.

See #865 (comment)
MartinThoma committed May 13, 2022
commit 1492aef3198a7e8c5af0e8b4170079c0aa2280c9
12 changes: 6 additions & 6 deletions PyPDF2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from PyPDF2._merger import PdfFileMerger
from PyPDF2._reader import PdfFileReader
from PyPDF2._version import __version__
from PyPDF2._writer import PdfFileWriter
from PyPDF2.pagerange import PageRange, parse_filename_page_ranges
from PyPDF2.papersizes import PaperSize
from ._merger import PdfFileMerger
from ._reader import PdfFileReader
from ._version import __version__
from ._writer import PdfFileWriter
from .pagerange import PageRange, parse_filename_page_ranges
from .papersizes import PaperSize

__all__ = [
"__version__",
16 changes: 8 additions & 8 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
@@ -28,11 +28,11 @@
from io import BytesIO, FileIO, IOBase
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

from PyPDF2._page import PageObject
from PyPDF2._reader import PdfFileReader
from PyPDF2._writer import PdfFileWriter
from PyPDF2.constants import PagesAttributes as PA
from PyPDF2.generic import (
from ._page import PageObject
from ._reader import PdfFileReader
from ._writer import PdfFileWriter
from .constants import PagesAttributes as PA
from .generic import (
ArrayObject,
Bookmark,
Destination,
@@ -46,16 +46,16 @@
TreeObject,
createStringObject,
)
from PyPDF2.pagerange import PageRange, PageRangeSpec
from PyPDF2.types import (
from .pagerange import PageRange, PageRangeSpec
from .types import (
BookmarkTypes,
LayoutType,
OutlinesType,
PagemodeType,
ZoomArgsType,
ZoomArgType,
)
from PyPDF2.utils import StrByteType, str_
from .utils import StrByteType, str_

ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore"

25 changes: 12 additions & 13 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
@@ -42,11 +42,10 @@
cast,
)

from PyPDF2 import utils
from PyPDF2.constants import PageAttributes as PG
from PyPDF2.constants import Ressources as RES
from PyPDF2.errors import PageSizeNotDefinedError
from PyPDF2.generic import (
from .constants import PageAttributes as PG
from .constants import Ressources as RES
from .errors import PageSizeNotDefinedError
from .generic import (
ArrayObject,
ContentStream,
DictionaryObject,
@@ -58,7 +57,7 @@
RectangleObject,
TextStringObject,
)
from PyPDF2.utils import b_
from .utils import b_, matrixMultiply


def getRectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
@@ -115,7 +114,7 @@ def __init__(
pdf: Optional[Any] = None, # PdfFileReader
indirectRef: Optional[IndirectObject] = None,
) -> None:
from PyPDF2._reader import PdfFileReader
from ._reader import PdfFileReader

DictionaryObject.__init__(self)
self.pdf: Optional[PdfFileReader] = pdf
@@ -535,8 +534,8 @@ def mergeRotatedTranslatedPage(
[0, 0, 1],
]
rtranslation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]]
ctm = utils.matrixMultiply(translation, rotating)
ctm = utils.matrixMultiply(ctm, rtranslation)
ctm = matrixMultiply(translation, rotating)
ctm = matrixMultiply(ctm, rtranslation)

return self.mergeTransformedPage(
page2,
@@ -565,7 +564,7 @@ def mergeRotatedScaledPage(
[0, 0, 1],
]
scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]]
ctm = utils.matrixMultiply(rotating, scaling)
ctm = matrixMultiply(rotating, scaling)

self.mergeTransformedPage(
page2,
@@ -596,7 +595,7 @@ def mergeScaledTranslatedPage(

translation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]]
scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]]
ctm = utils.matrixMultiply(scaling, translation)
ctm = matrixMultiply(scaling, translation)

return self.mergeTransformedPage(
page2,
@@ -635,8 +634,8 @@ def mergeRotatedScaledTranslatedPage(
[0, 0, 1],
]
scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]]
ctm = utils.matrixMultiply(rotating, scaling)
ctm = utils.matrixMultiply(ctm, translation)
ctm = matrixMultiply(rotating, scaling)
ctm = matrixMultiply(ctm, translation)

self.mergeTransformedPage(
page2,
55 changes: 29 additions & 26 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
@@ -44,20 +44,19 @@
cast,
)

from PyPDF2 import utils
from PyPDF2._page import PageObject
from PyPDF2._security import _alg33_1, _alg34, _alg35
from PyPDF2.constants import CatalogAttributes as CA
from PyPDF2.constants import CatalogDictionary as CD
from PyPDF2.constants import Core as CO
from PyPDF2.constants import DocumentInformationAttributes as DI
from PyPDF2.constants import EncryptionDictAttributes as ED
from PyPDF2.constants import PageAttributes as PG
from PyPDF2.constants import PagesAttributes as PA
from PyPDF2.constants import StreamAttributes as SA
from PyPDF2.constants import TrailerKeys as TK
from PyPDF2.errors import PdfReadError, PdfReadWarning, PdfStreamError
from PyPDF2.generic import (
from ._page import PageObject
from ._security import _alg33_1, _alg34, _alg35
from .constants import CatalogAttributes as CA
from .constants import CatalogDictionary as CD
from .constants import Core as CO
from .constants import DocumentInformationAttributes as DI
from .constants import EncryptionDictAttributes as ED
from .constants import PageAttributes as PG
from .constants import PagesAttributes as PA
from .constants import StreamAttributes as SA
from .constants import TrailerKeys as TK
from .errors import PdfReadError, PdfReadWarning, PdfStreamError
from .generic import (
ArrayObject,
BooleanObject,
ByteStringObject,
@@ -79,15 +78,19 @@
createStringObject,
readObject,
)
from PyPDF2.types import OutlinesType
from PyPDF2.utils import (
from .types import OutlinesType
from .utils import (
RC4_encrypt,
StrByteType,
StreamType,
b_,
ord_,
readNonWhitespace,
readUntilWhitespace,
skipOverComment,
skipOverWhitespace,
)
from PyPDF2.xmp import XmpInformation
from .xmp import XmpInformation


def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:
@@ -923,9 +926,9 @@ def _decryptObject(
key: Union[str, bytes],
) -> PdfObject:
if isinstance(obj, (ByteStringObject, TextStringObject)):
obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
obj = createStringObject(RC4_encrypt(key, obj.original_bytes))
elif isinstance(obj, StreamObject):
obj._data = utils.RC4_encrypt(key, obj._data)
obj._data = RC4_encrypt(key, obj._data)
elif isinstance(obj, DictionaryObject):
for dictkey, value in list(obj.items()):
obj[dictkey] = self._decryptObject(value, key)
@@ -940,14 +943,14 @@ def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]:
# object header. In reality... some files have stupid cross reference
# tables that are off by whitespace bytes.
extra = False
utils.skipOverComment(stream)
extra |= utils.skipOverWhitespace(stream)
skipOverComment(stream)
extra |= skipOverWhitespace(stream)
stream.seek(-1, 1)
idnum = readUntilWhitespace(stream)
extra |= utils.skipOverWhitespace(stream)
extra |= skipOverWhitespace(stream)
stream.seek(-1, 1)
generation = readUntilWhitespace(stream)
extra |= utils.skipOverWhitespace(stream)
extra |= skipOverWhitespace(stream)
stream.seek(-1, 1)

# although it's not used, it might still be necessary to read
@@ -1442,14 +1445,14 @@ def _decrypt(self, password: Union[str, bytes]) -> int:
key = _alg33_1(password, rev, keylen)
real_O = cast(bytes, encrypt["/O"].getObject())
if rev == 2:
userpass = utils.RC4_encrypt(key, real_O)
userpass = RC4_encrypt(key, real_O)
else:
val = real_O
for i in range(19, -1, -1):
new_key = b_("")
for l in range(len(key)):
new_key += b_(chr(utils.ord_(key[l]) ^ i))
val = utils.RC4_encrypt(new_key, val)
new_key += b_(chr(ord_(key[l]) ^ i))
val = RC4_encrypt(new_key, val)
userpass = val
owner_password, key = self._authenticateUserPassword(userpass)
if owner_password:
15 changes: 7 additions & 8 deletions PyPDF2/_security.py
Original file line number Diff line number Diff line change
@@ -33,9 +33,8 @@
from hashlib import md5
from typing import Any, Tuple, Union

from PyPDF2 import utils
from PyPDF2.generic import BooleanObject, ByteStringObject
from PyPDF2.utils import b_, ord_, str_
from .generic import BooleanObject, ByteStringObject
from .utils import RC4_encrypt, b_, ord_, str_

# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
_encryption_padding = (
@@ -106,7 +105,7 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
user_pwd_bytes = b_((user_pwd + str_(_encryption_padding))[:32])
# 6. Encrypt the result of step 5, using an RC4 encryption function with
# the encryption key obtained in step 4.
val = utils.RC4_encrypt(key, user_pwd_bytes)
val = RC4_encrypt(key, user_pwd_bytes)
# 7. (Revision 3 or greater) Do the following 19 times: Take the output
# from the previous invocation of the RC4 function and pass it as input to
# a new invocation of the function; use an encryption key generated by
@@ -118,7 +117,7 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes:
new_key = ""
for l in range(len(key)):
new_key += chr(ord_(key[l]) ^ i)
val = utils.RC4_encrypt(new_key, val)
val = RC4_encrypt(new_key, val)
# 8. Store the output from the final invocation of the RC4 as the value of
# the /O entry in the encryption dictionary.
return val
@@ -163,7 +162,7 @@ def _alg34(
# 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
# using an RC4 encryption function with the encryption key from the
# preceding step.
U = utils.RC4_encrypt(key, _encryption_padding)
U = RC4_encrypt(key, _encryption_padding)
# 3. Store the result of step 2 as the value of the /U entry in the
# encryption dictionary.
return U, key
@@ -195,7 +194,7 @@ def _alg35(
md5_hash = m.digest()
# 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
# function with the encryption key from step 1.
val = utils.RC4_encrypt(key, md5_hash)
val = RC4_encrypt(key, md5_hash)
# 5. Do the following 19 times: Take the output from the previous
# invocation of the RC4 function and pass it as input to a new invocation
# of the function; use an encryption key generated by taking each byte of
@@ -206,7 +205,7 @@ def _alg35(
new_key = b_("")
for k in key:
new_key += b_(chr(ord_(k) ^ i))
val = utils.RC4_encrypt(new_key, val)
val = RC4_encrypt(new_key, val)
# 6. Append 16 bytes of arbitrary padding to the output from the final
# invocation of the RC4 function and store the 32-byte result as the value
# of the U entry in the encryption dictionary.
26 changes: 13 additions & 13 deletions PyPDF2/_writer.py
Original file line number Diff line number Diff line change
@@ -35,17 +35,17 @@
from hashlib import md5
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast

from PyPDF2._page import PageObject
from PyPDF2._reader import PdfFileReader
from PyPDF2._security import _alg33, _alg34, _alg35
from PyPDF2.constants import CatalogAttributes as CA
from PyPDF2.constants import Core as CO
from PyPDF2.constants import EncryptionDictAttributes as ED
from PyPDF2.constants import PageAttributes as PG
from PyPDF2.constants import PagesAttributes as PA
from PyPDF2.constants import StreamAttributes as SA
from PyPDF2.constants import TrailerKeys as TK
from PyPDF2.generic import (
from ._page import PageObject
from ._reader import PdfFileReader
from ._security import _alg33, _alg34, _alg35
from .constants import CatalogAttributes as CA
from .constants import Core as CO
from .constants import EncryptionDictAttributes as ED
from .constants import PageAttributes as PG
from .constants import PagesAttributes as PA
from .constants import StreamAttributes as SA
from .constants import TrailerKeys as TK
from .generic import (
ArrayObject,
BooleanObject,
ByteStringObject,
@@ -65,7 +65,7 @@
TreeObject,
createStringObject,
)
from PyPDF2.types import (
from .types import (
BookmarkTypes,
BorderArrayType,
FitType,
@@ -74,7 +74,7 @@
ZoomArgsType,
ZoomArgType,
)
from PyPDF2.utils import StreamType, b_
from .utils import StreamType, b_

logger = logging.getLogger(__name__)

24 changes: 12 additions & 12 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
@@ -35,22 +35,22 @@
from io import StringIO
from typing import Any, Dict, Optional, Tuple, Union

from PyPDF2.generic import ArrayObject, DictionaryObject, NameObject
from .generic import ArrayObject, DictionaryObject, NameObject

try:
from typing import Literal # type: ignore[attr-defined]
except ImportError:
from typing_extensions import Literal # type: ignore[misc]

from PyPDF2.constants import CcittFaxDecodeParameters as CCITT
from PyPDF2.constants import ColorSpaces
from PyPDF2.constants import FilterTypeAbbreviations as FTA
from PyPDF2.constants import FilterTypes as FT
from PyPDF2.constants import ImageAttributes as IA
from PyPDF2.constants import LzwFilterParameters as LZW
from PyPDF2.constants import StreamAttributes as SA
from PyPDF2.errors import PdfReadError, PdfStreamError
from PyPDF2.utils import ord_, paethPredictor
from .constants import CcittFaxDecodeParameters as CCITT
from .constants import ColorSpaces
from .constants import FilterTypeAbbreviations as FTA
from .constants import FilterTypes as FT
from .constants import ImageAttributes as IA
from .constants import LzwFilterParameters as LZW
from .constants import StreamAttributes as SA
from .errors import PdfReadError, PdfStreamError
from .utils import ord_, paethPredictor

try:
import zlib
@@ -558,7 +558,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:

from PIL import Image

from PyPDF2.constants import GraphicsStateParameters as G
from .constants import GraphicsStateParameters as G

size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
data = x_object_obj.getData() # type: ignore
@@ -582,7 +582,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
[FT.ASCII_85_DECODE],
[FT.CCITT_FAX_DECODE],
):
from PyPDF2.utils import b_
from .utils import b_

extension = ".png"
data = b_(data)
45 changes: 23 additions & 22 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
@@ -40,22 +40,27 @@
from io import BytesIO
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

from PyPDF2 import utils
from PyPDF2.constants import FilterTypes as FT
from PyPDF2.constants import StreamAttributes as SA
from PyPDF2.errors import (
from .constants import FilterTypes as FT
from .constants import StreamAttributes as SA
from .errors import (
STREAM_TRUNCATED_PREMATURELY,
PdfReadError,
PdfReadWarning,
PdfStreamError,
)
from PyPDF2.utils import (
from .utils import (
WHITESPACES,
RC4_encrypt,
StreamType,
b_,
bytes_type,
hexencode,
hexStr,
ord_,
readNonWhitespace,
readUntilRegex,
skipOverComment,
str_,
)

logger = logging.getLogger(__name__)
@@ -200,7 +205,7 @@ def readFromStream(
if r != b_("R"):
raise PdfReadError(
"Error reading indirect object reference at byte %s"
% utils.hexStr(stream.tell())
% hexStr(stream.tell())
)
return IndirectObject(int(idnum), int(generation), pdf)

@@ -210,7 +215,7 @@ def __new__(
cls, value: Union[str, Any] = "0", context: Optional[Any] = None
) -> "FloatObject":
try:
return decimal.Decimal.__new__(cls, utils.str_(value), context)
return decimal.Decimal.__new__(cls, str_(value), context)
except Exception:
try:
return decimal.Decimal.__new__(cls, str(value))
@@ -261,7 +266,7 @@ def writeToStream(

@staticmethod
def readFromStream(stream: StreamType) -> Union["NumberObject", FloatObject]:
num = utils.readUntilRegex(stream, NumberObject.NumberPattern)
num = readUntilRegex(stream, NumberObject.NumberPattern)
if num.find(NumberObject.ByteDot) != -1:
return FloatObject(num)
else:
@@ -366,7 +371,7 @@ def readStringFromStream(
return createStringObject(txt)


class ByteStringObject(utils.bytes_type, PdfObject): # type: ignore
class ByteStringObject(bytes_type, PdfObject): # type: ignore
"""
Represents a string object where the text encoding could not be determined.
This occurs quite often, as the PDF spec doesn't provide an alternate way to
@@ -386,7 +391,7 @@ def writeToStream(
if encryption_key:
bytearr = RC4_encrypt(encryption_key, bytearr) # type: ignore
stream.write(b_("<"))
stream.write(utils.hexencode(bytearr))
stream.write(hexencode(bytearr))
stream.write(b_(">"))


@@ -462,9 +467,7 @@ def readFromStream(stream: StreamType, pdf: Any) -> "NameObject": # PdfFileRead
name = stream.read(1)
if name != NameObject.surfix:
raise PdfReadError("name read error")
name += utils.readUntilRegex(
stream, NameObject.delimiterPattern, ignore_eof=True
)
name += readUntilRegex(stream, NameObject.delimiterPattern, ignore_eof=True)
try:
try:
ret = name.decode("utf-8")
@@ -580,7 +583,7 @@ def readUnsizedFromSteam(
if tmp != b_("<<"):
raise PdfReadError(
"Dictionary read error at byte %s: stream must begin with '<<'"
% utils.hexStr(stream.tell())
% hexStr(stream.tell())
)
data: Dict[Any, Any] = {}
while True:
@@ -608,12 +611,12 @@ def readUnsizedFromSteam(
# multiple definitions of key not permitted
raise PdfReadError(
"Multiple definitions in dictionary at byte %s for key %s"
% (utils.hexStr(stream.tell()), key)
% (hexStr(stream.tell()), key)
)
else:
warnings.warn(
"Multiple definitions in dictionary at byte %s for key %s"
% (utils.hexStr(stream.tell()), key),
% (hexStr(stream.tell()), key),
PdfReadWarning,
)

@@ -664,7 +667,7 @@ def readUnsizedFromSteam(
stream.seek(pos, 0)
raise PdfReadError(
"Unable to find 'endstream' marker after stream at byte %s."
% utils.hexStr(stream.tell())
% hexStr(stream.tell())
)
else:
stream.seek(pos, 0)
@@ -932,9 +935,7 @@ def __parseContentStream(self, stream: StreamType) -> None:
break
stream.seek(-1, 1)
if peek.isalpha() or peek == b_("'") or peek == b_('"'):
operator = utils.readUntilRegex(
stream, NameObject.delimiterPattern, True
)
operator = readUntilRegex(stream, NameObject.delimiterPattern, True)
if operator == b_("BI"):
# begin inline image - a completely different parsing
# mechanism is required, of course... thanks buddy...
@@ -1000,7 +1001,7 @@ def _readInlineImage(self, stream: StreamType) -> Dict[str, Any]:
info = tok + tok2
# We need to find whitespace between EI and Q.
has_q_whitespace = False
while tok3 in utils.WHITESPACES:
while tok3 in WHITESPACES:
has_q_whitespace = True
info += tok3
tok3 = stream.read(1)
@@ -1501,7 +1502,7 @@ def createStringObject(
"""
if isinstance(string, str):
return TextStringObject(string)
elif isinstance(string, utils.bytes_type):
elif isinstance(string, bytes_type):
try:
if string.startswith(codecs.BOM_UTF16_BE):
retval = TextStringObject(string.decode("utf-16"))
2 changes: 1 addition & 1 deletion PyPDF2/utils.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@
from io import BufferedReader, BufferedWriter, BytesIO, FileIO
from typing import Any, Dict, List, Optional, Union, overload

from PyPDF2.errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
from .errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError

bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
6 changes: 3 additions & 3 deletions PyPDF2/xmp.py
Original file line number Diff line number Diff line change
@@ -6,8 +6,8 @@
from xml.dom.minidom import Element as XmlElement
from xml.dom.minidom import parseString

from PyPDF2.generic import PdfObject
from PyPDF2.utils import StreamType
from .generic import PdfObject
from .utils import StreamType

RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
@@ -188,7 +188,7 @@ class XmpInformation(PdfObject):
Usually accessed by :meth:`getXmpMetadata()<PyPDF2.PdfFileReader.getXmpMetadata>`
"""

from PyPDF2.generic import ContentStream
from .generic import ContentStream

def __init__(self, stream: ContentStream) -> None:
self.stream = stream