From 4429066367248fc4786a0d3c6ba59427b4b8bf63 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 14 May 2022 15:24:05 +0200 Subject: [PATCH] DEV: Use relative imports (#875) This is important for some PyPDF2 developers development workflows. It allows them to compare changes more easily. Additionally, some imports were moved from function-level to module-level. See https://github.com/py-pdf/PyPDF2/pull/865#issuecomment-1125225910 Co-authored-by: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> --- PyPDF2/__init__.py | 12 +++++----- PyPDF2/_merger.py | 16 ++++++------- PyPDF2/_page.py | 25 ++++++++++---------- PyPDF2/_reader.py | 57 ++++++++++++++++++++++++--------------------- PyPDF2/_security.py | 15 ++++++------ PyPDF2/_writer.py | 32 ++++++++++++------------- PyPDF2/filters.py | 33 +++++++++++--------------- PyPDF2/generic.py | 56 +++++++++++++++++++++++--------------------- PyPDF2/pagerange.py | 2 +- PyPDF2/types.py | 2 +- PyPDF2/utils.py | 6 ++--- PyPDF2/xmp.py | 6 ++--- 12 files changed, 129 insertions(+), 133 deletions(-) diff --git a/PyPDF2/__init__.py b/PyPDF2/__init__.py index 67a30d93a..0b29ce250 100644 --- a/PyPDF2/__init__.py +++ b/PyPDF2/__init__.py @@ -1,9 +1,9 @@ -from PyPDF2._merger import PdfFileMerger -from PyPDF2._reader import PdfFileReader -from PyPDF2._version import __version__ -from PyPDF2._writer import PdfFileWriter -from PyPDF2.pagerange import PageRange, parse_filename_page_ranges -from PyPDF2.papersizes import PaperSize +from ._merger import PdfFileMerger +from ._reader import PdfFileReader +from ._version import __version__ +from ._writer import PdfFileWriter +from .pagerange import PageRange, parse_filename_page_ranges +from .papersizes import PaperSize __all__ = [ "__version__", diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 4f76950bc..cc8ef12d7 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -28,11 +28,11 @@ from io import BytesIO, FileIO, IOBase from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast -from PyPDF2._page import PageObject -from PyPDF2._reader import PdfFileReader -from PyPDF2._writer import PdfFileWriter -from PyPDF2.constants import PagesAttributes as PA -from PyPDF2.generic import ( +from ._page import PageObject +from ._reader import PdfFileReader +from ._writer import PdfFileWriter +from .constants import PagesAttributes as PA +from .generic import ( ArrayObject, Bookmark, Destination, @@ -46,8 +46,8 @@ TreeObject, createStringObject, ) -from PyPDF2.pagerange import PageRange, PageRangeSpec -from PyPDF2.types import ( +from .pagerange import PageRange, PageRangeSpec +from .types import ( BookmarkTypes, LayoutType, OutlinesType, @@ -55,7 +55,7 @@ ZoomArgsType, ZoomArgType, ) -from PyPDF2.utils import StrByteType, str_ +from .utils import StrByteType, str_ ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 39cf4ef3a..01b45d8a1 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -42,11 +42,10 @@ cast, ) -from PyPDF2 import utils -from PyPDF2.constants import PageAttributes as PG -from PyPDF2.constants import Ressources as RES -from PyPDF2.errors import PageSizeNotDefinedError -from PyPDF2.generic import ( +from .constants import PageAttributes as PG +from .constants import Ressources as RES +from .errors import PageSizeNotDefinedError +from .generic import ( ArrayObject, ContentStream, DictionaryObject, @@ -58,7 +57,7 @@ RectangleObject, TextStringObject, ) -from PyPDF2.utils import b_ +from .utils import b_, matrixMultiply def getRectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject: @@ -115,7 +114,7 @@ def __init__( pdf: Optional[Any] = None, # PdfFileReader indirectRef: Optional[IndirectObject] = None, ) -> None: - from PyPDF2._reader import PdfFileReader + from ._reader import PdfFileReader DictionaryObject.__init__(self) self.pdf: Optional[PdfFileReader] = pdf @@ -535,8 +534,8 @@ def mergeRotatedTranslatedPage( [0, 0, 1], ] rtranslation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]] - ctm = utils.matrixMultiply(translation, rotating) - ctm = utils.matrixMultiply(ctm, rtranslation) + ctm = matrixMultiply(translation, rotating) + ctm = matrixMultiply(ctm, rtranslation) return self.mergeTransformedPage( page2, @@ -565,7 +564,7 @@ def mergeRotatedScaledPage( [0, 0, 1], ] scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = utils.matrixMultiply(rotating, scaling) + ctm = matrixMultiply(rotating, scaling) self.mergeTransformedPage( page2, @@ -596,7 +595,7 @@ def mergeScaledTranslatedPage( translation: List[List[float]] = [[1, 0, 0], [0, 1, 0], [tx, ty, 1]] scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = utils.matrixMultiply(scaling, translation) + ctm = matrixMultiply(scaling, translation) return self.mergeTransformedPage( page2, @@ -635,8 +634,8 @@ def mergeRotatedScaledTranslatedPage( [0, 0, 1], ] scaling: List[List[float]] = [[scale, 0, 0], [0, scale, 0], [0, 0, 1]] - ctm = utils.matrixMultiply(rotating, scaling) - ctm = utils.matrixMultiply(ctm, translation) + ctm = matrixMultiply(rotating, scaling) + ctm = matrixMultiply(ctm, translation) self.mergeTransformedPage( page2, diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 5de83f009..479f416d4 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -28,6 +28,7 @@ # POSSIBILITY OF SUCH DAMAGE. import struct +import re import warnings from hashlib import md5 from io import BytesIO @@ -44,20 +45,19 @@ cast, ) -from PyPDF2 import utils -from PyPDF2._page import PageObject -from PyPDF2._security import _alg33_1, _alg34, _alg35 -from PyPDF2.constants import CatalogAttributes as CA -from PyPDF2.constants import CatalogDictionary as CD -from PyPDF2.constants import Core as CO -from PyPDF2.constants import DocumentInformationAttributes as DI -from PyPDF2.constants import EncryptionDictAttributes as ED -from PyPDF2.constants import PageAttributes as PG -from PyPDF2.constants import PagesAttributes as PA -from PyPDF2.constants import StreamAttributes as SA -from PyPDF2.constants import TrailerKeys as TK -from PyPDF2.errors import PdfReadError, PdfReadWarning, PdfStreamError -from PyPDF2.generic import ( +from ._page import PageObject +from ._security import _alg33_1, _alg34, _alg35 +from .constants import CatalogAttributes as CA +from .constants import CatalogDictionary as CD +from .constants import Core as CO +from .constants import DocumentInformationAttributes as DI +from .constants import EncryptionDictAttributes as ED +from .constants import PageAttributes as PG +from .constants import PagesAttributes as PA +from .constants import StreamAttributes as SA +from .constants import TrailerKeys as TK +from .errors import PdfReadError, PdfReadWarning, PdfStreamError +from .generic import ( ArrayObject, BooleanObject, ByteStringObject, @@ -79,15 +79,19 @@ createStringObject, readObject, ) -from PyPDF2.types import OutlinesType -from PyPDF2.utils import ( +from .types import OutlinesType +from .utils import ( + RC4_encrypt, StrByteType, StreamType, b_, + ord_, readNonWhitespace, readUntilWhitespace, + skipOverComment, + skipOverWhitespace, ) -from PyPDF2.xmp import XmpInformation +from .xmp import XmpInformation def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]: @@ -923,9 +927,9 @@ def _decryptObject( key: Union[str, bytes], ) -> PdfObject: if isinstance(obj, (ByteStringObject, TextStringObject)): - obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes)) + obj = createStringObject(RC4_encrypt(key, obj.original_bytes)) elif isinstance(obj, StreamObject): - obj._data = utils.RC4_encrypt(key, obj._data) + obj._data = RC4_encrypt(key, obj._data) elif isinstance(obj, DictionaryObject): for dictkey, value in list(obj.items()): obj[dictkey] = self._decryptObject(value, key) @@ -940,14 +944,14 @@ def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]: # object header. In reality... some files have stupid cross reference # tables that are off by whitespace bytes. extra = False - utils.skipOverComment(stream) - extra |= utils.skipOverWhitespace(stream) + skipOverComment(stream) + extra |= skipOverWhitespace(stream) stream.seek(-1, 1) idnum = readUntilWhitespace(stream) - extra |= utils.skipOverWhitespace(stream) + extra |= skipOverWhitespace(stream) stream.seek(-1, 1) generation = readUntilWhitespace(stream) - extra |= utils.skipOverWhitespace(stream) + extra |= skipOverWhitespace(stream) stream.seek(-1, 1) # although it's not used, it might still be necessary to read @@ -1263,7 +1267,6 @@ def _rebuild_xref_table(self, stream: StreamType) -> None: self.xref = {} stream.seek(0, 0) f_ = stream.read(-1) - import re for m in re.finditer(b_(r"[\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+obj"), f_): idnum = int(m.group(1)) @@ -1442,14 +1445,14 @@ def _decrypt(self, password: Union[str, bytes]) -> int: key = _alg33_1(password, rev, keylen) real_O = cast(bytes, encrypt["/O"].getObject()) if rev == 2: - userpass = utils.RC4_encrypt(key, real_O) + userpass = RC4_encrypt(key, real_O) else: val = real_O for i in range(19, -1, -1): new_key = b_("") for l in range(len(key)): - new_key += b_(chr(utils.ord_(key[l]) ^ i)) - val = utils.RC4_encrypt(new_key, val) + new_key += b_(chr(ord_(key[l]) ^ i)) + val = RC4_encrypt(new_key, val) userpass = val owner_password, key = self._authenticateUserPassword(userpass) if owner_password: diff --git a/PyPDF2/_security.py b/PyPDF2/_security.py index d2c8b7c2f..15c8c8c00 100644 --- a/PyPDF2/_security.py +++ b/PyPDF2/_security.py @@ -33,9 +33,8 @@ from hashlib import md5 from typing import Any, Tuple, Union -from PyPDF2 import utils -from PyPDF2.generic import BooleanObject, ByteStringObject -from PyPDF2.utils import b_, ord_, str_ +from .generic import BooleanObject, ByteStringObject +from .utils import RC4_encrypt, b_, ord_, str_ # ref: pdf1.8 spec section 3.5.2 algorithm 3.2 _encryption_padding = ( @@ -106,7 +105,7 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes: user_pwd_bytes = b_((user_pwd + str_(_encryption_padding))[:32]) # 6. Encrypt the result of step 5, using an RC4 encryption function with # the encryption key obtained in step 4. - val = utils.RC4_encrypt(key, user_pwd_bytes) + val = RC4_encrypt(key, user_pwd_bytes) # 7. (Revision 3 or greater) Do the following 19 times: Take the output # from the previous invocation of the RC4 function and pass it as input to # a new invocation of the function; use an encryption key generated by @@ -118,7 +117,7 @@ def _alg33(owner_pwd: str, user_pwd: str, rev: int, keylen: int) -> bytes: new_key = "" for l in range(len(key)): new_key += chr(ord_(key[l]) ^ i) - val = utils.RC4_encrypt(new_key, val) + val = RC4_encrypt(new_key, val) # 8. Store the output from the final invocation of the RC4 as the value of # the /O entry in the encryption dictionary. return val @@ -163,7 +162,7 @@ def _alg34( # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, # using an RC4 encryption function with the encryption key from the # preceding step. - U = utils.RC4_encrypt(key, _encryption_padding) + U = RC4_encrypt(key, _encryption_padding) # 3. Store the result of step 2 as the value of the /U entry in the # encryption dictionary. return U, key @@ -195,7 +194,7 @@ def _alg35( md5_hash = m.digest() # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption # function with the encryption key from step 1. - val = utils.RC4_encrypt(key, md5_hash) + val = RC4_encrypt(key, md5_hash) # 5. Do the following 19 times: Take the output from the previous # invocation of the RC4 function and pass it as input to a new invocation # of the function; use an encryption key generated by taking each byte of @@ -206,7 +205,7 @@ def _alg35( new_key = b_("") for k in key: new_key += b_(chr(ord_(k) ^ i)) - val = utils.RC4_encrypt(new_key, val) + val = RC4_encrypt(new_key, val) # 6. Append 16 bytes of arbitrary padding to the output from the final # invocation of the RC4 function and store the 32-byte result as the value # of the U entry in the encryption dictionary. diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 122df7e42..fcd3c97d8 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -32,20 +32,23 @@ import struct import uuid import warnings +import random +import time + from hashlib import md5 from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast -from PyPDF2._page import PageObject -from PyPDF2._reader import PdfFileReader -from PyPDF2._security import _alg33, _alg34, _alg35 -from PyPDF2.constants import CatalogAttributes as CA -from PyPDF2.constants import Core as CO -from PyPDF2.constants import EncryptionDictAttributes as ED -from PyPDF2.constants import PageAttributes as PG -from PyPDF2.constants import PagesAttributes as PA -from PyPDF2.constants import StreamAttributes as SA -from PyPDF2.constants import TrailerKeys as TK -from PyPDF2.generic import ( +from ._page import PageObject +from ._reader import PdfFileReader +from ._security import _alg33, _alg34, _alg35 +from .constants import CatalogAttributes as CA +from .constants import Core as CO +from .constants import EncryptionDictAttributes as ED +from .constants import PageAttributes as PG +from .constants import PagesAttributes as PA +from .constants import StreamAttributes as SA +from .constants import TrailerKeys as TK +from .generic import ( ArrayObject, BooleanObject, ByteStringObject, @@ -65,7 +68,7 @@ TreeObject, createStringObject, ) -from PyPDF2.types import ( +from .types import ( BookmarkTypes, BorderArrayType, FitType, @@ -74,7 +77,7 @@ ZoomArgsType, ZoomArgType, ) -from PyPDF2.utils import StreamType, b_ +from .utils import StreamType, b_ logger = logging.getLogger(__name__) @@ -496,9 +499,6 @@ def encrypt( control annotations, 9 for form fields, 10 for extraction of text and graphics. """ - import random - import time - if owner_pwd is None: owner_pwd = user_pwd if use_128bit: diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 6f1df0294..968a32321 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -32,26 +32,27 @@ import math import struct -from io import StringIO +from io import StringIO,BytesIO from typing import Any, Dict, Optional, Tuple, Union import zlib -from PyPDF2.generic import ArrayObject, DictionaryObject, NameObject +from .generic import ArrayObject, DictionaryObject, NameObject try: from typing import Literal # type: ignore[attr-defined] except ImportError: from typing_extensions import Literal # type: ignore[misc] -from PyPDF2.constants import CcittFaxDecodeParameters as CCITT -from PyPDF2.constants import ColorSpaces -from PyPDF2.constants import FilterTypeAbbreviations as FTA -from PyPDF2.constants import FilterTypes as FT -from PyPDF2.constants import ImageAttributes as IA -from PyPDF2.constants import LzwFilterParameters as LZW -from PyPDF2.constants import StreamAttributes as SA -from PyPDF2.errors import PdfReadError, PdfStreamError -from PyPDF2.utils import ord_, paethPredictor +from .constants import CcittFaxDecodeParameters as CCITT +from .constants import ColorSpaces +from .constants import FilterTypeAbbreviations as FTA +from .constants import FilterTypes as FT +from .constants import ImageAttributes as IA +from .constants import LzwFilterParameters as LZW +from .constants import StreamAttributes as SA +from .constants import GraphicsStateParameters as G +from .errors import PdfReadError, PdfStreamError +from .utils import b_,ord_, paethPredictor def decompress(data: bytes) -> bytes: @@ -498,12 +499,8 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]: :return: Tuple[file extension, bytes] """ - import io - from PIL import Image - from PyPDF2.constants import GraphicsStateParameters as G - size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT]) data = x_object_obj.getData() # type: ignore if x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB: @@ -518,7 +515,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]: if G.S_MASK in x_object_obj: # add alpha channel alpha = Image.frombytes("L", size, x_object_obj[G.S_MASK].getData()) img.putalpha(alpha) - img_byte_arr = io.BytesIO() + img_byte_arr = BytesIO() img.save(img_byte_arr, format="PNG") data = img_byte_arr.getvalue() elif x_object_obj[SA.FILTER] in ( @@ -526,8 +523,6 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]: [FT.ASCII_85_DECODE], [FT.CCITT_FAX_DECODE], ): - from PyPDF2.utils import b_ - extension = ".png" data = b_(data) elif x_object_obj[SA.FILTER] == FT.DCT_DECODE: @@ -539,7 +534,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]: else: extension = ".png" img = Image.frombytes(mode, size, data) - img_byte_arr = io.BytesIO() + img_byte_arr = BytesIO() img.save(img_byte_arr, format="PNG") data = img_byte_arr.getvalue() diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 0e3ec68a5..f275a5286 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -40,22 +40,30 @@ from io import BytesIO from typing import Any, Dict, Iterable, List, Optional, Tuple, Union -from PyPDF2 import utils -from PyPDF2.constants import FilterTypes as FT -from PyPDF2.constants import StreamAttributes as SA -from PyPDF2.errors import ( +from .constants import FilterTypes as FT +from .constants import StreamAttributes as SA +from .constants import TypArguments as TA +from .constants import TypFitArguments as TF + +from .errors import ( STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfReadWarning, PdfStreamError, ) -from PyPDF2.utils import ( +from .utils import ( + WHITESPACES, RC4_encrypt, StreamType, b_, + bytes_type, + hexencode, + hexStr, ord_, readNonWhitespace, + readUntilRegex, skipOverComment, + str_, ) logger = logging.getLogger(__name__) @@ -200,7 +208,7 @@ def readFromStream( if r != b_("R"): raise PdfReadError( "Error reading indirect object reference at byte %s" - % utils.hexStr(stream.tell()) + % hexStr(stream.tell()) ) return IndirectObject(int(idnum), int(generation), pdf) @@ -210,7 +218,7 @@ def __new__( cls, value: Union[str, Any] = "0", context: Optional[Any] = None ) -> "FloatObject": try: - return decimal.Decimal.__new__(cls, utils.str_(value), context) + return decimal.Decimal.__new__(cls, str_(value), context) except Exception: try: return decimal.Decimal.__new__(cls, str(value)) @@ -261,7 +269,7 @@ def writeToStream( @staticmethod def readFromStream(stream: StreamType) -> Union["NumberObject", FloatObject]: - num = utils.readUntilRegex(stream, NumberObject.NumberPattern) + num = readUntilRegex(stream, NumberObject.NumberPattern) if num.find(NumberObject.ByteDot) != -1: return FloatObject(num) else: @@ -366,7 +374,7 @@ def readStringFromStream( return createStringObject(txt) -class ByteStringObject(utils.bytes_type, PdfObject): # type: ignore +class ByteStringObject(bytes_type, PdfObject): # type: ignore """ Represents a string object where the text encoding could not be determined. This occurs quite often, as the PDF spec doesn't provide an alternate way to @@ -386,7 +394,7 @@ def writeToStream( if encryption_key: bytearr = RC4_encrypt(encryption_key, bytearr) # type: ignore stream.write(b_("<")) - stream.write(utils.hexencode(bytearr)) + stream.write(hexencode(bytearr)) stream.write(b_(">")) @@ -462,9 +470,7 @@ def readFromStream(stream: StreamType, pdf: Any) -> "NameObject": # PdfFileRead name = stream.read(1) if name != NameObject.surfix: raise PdfReadError("name read error") - name += utils.readUntilRegex( - stream, NameObject.delimiterPattern, ignore_eof=True - ) + name += readUntilRegex(stream, NameObject.delimiterPattern, ignore_eof=True) try: try: ret = name.decode("utf-8") @@ -512,7 +518,7 @@ def getXmpMetadata(self) -> Optional[PdfObject]: # XmpInformation that can be used to access XMP metadata from the document. Can also return None if no metadata was found on the document root. """ - from PyPDF2.xmp import XmpInformation + from .xmp import XmpInformation metadata = self.get("/Metadata", None) if metadata is None: @@ -580,7 +586,7 @@ def readUnsizedFromSteam( if tmp != b_("<<"): raise PdfReadError( "Dictionary read error at byte %s: stream must begin with '<<'" - % utils.hexStr(stream.tell()) + % hexStr(stream.tell()) ) data: Dict[Any, Any] = {} while True: @@ -608,12 +614,12 @@ def readUnsizedFromSteam( # multiple definitions of key not permitted raise PdfReadError( "Multiple definitions in dictionary at byte %s for key %s" - % (utils.hexStr(stream.tell()), key) + % (hexStr(stream.tell()), key) ) else: warnings.warn( "Multiple definitions in dictionary at byte %s for key %s" - % (utils.hexStr(stream.tell()), key), + % (hexStr(stream.tell()), key), PdfReadWarning, ) @@ -664,7 +670,7 @@ def readUnsizedFromSteam( stream.seek(pos, 0) raise PdfReadError( "Unable to find 'endstream' marker after stream at byte %s." - % utils.hexStr(stream.tell()) + % hexStr(stream.tell()) ) else: stream.seek(pos, 0) @@ -849,7 +855,7 @@ def initializeFromDictionary( return retval def flateEncode(self) -> "EncodedStreamObject": - from PyPDF2.filters import FlateDecode + from .filters import FlateDecode if SA.FILTER in self: f = self[SA.FILTER] @@ -881,7 +887,7 @@ def __init__(self) -> None: self.decodedSelf: Optional[DecodedStreamObject] = None def getData(self) -> Union[None, str, bytes]: - from PyPDF2.filters import decodeStreamData + from .filters import decodeStreamData if self.decodedSelf: # cached version of decoded object @@ -932,9 +938,7 @@ def __parseContentStream(self, stream: StreamType) -> None: break stream.seek(-1, 1) if peek.isalpha() or peek == b_("'") or peek == b_('"'): - operator = utils.readUntilRegex( - stream, NameObject.delimiterPattern, True - ) + operator = readUntilRegex(stream, NameObject.delimiterPattern, True) if operator == b_("BI"): # begin inline image - a completely different parsing # mechanism is required, of course... thanks buddy... @@ -1000,7 +1004,7 @@ def _readInlineImage(self, stream: StreamType) -> Dict[str, Any]: info = tok + tok2 # We need to find whitespace between EI and Q. has_q_whitespace = False - while tok3 in utils.WHITESPACES: + while tok3 in WHITESPACES: has_q_whitespace = True info += tok3 tok3 = stream.read(1) @@ -1336,8 +1340,6 @@ def __init__( self[NameObject("/Page")] = page self[NameObject("/Type")] = typ - from PyPDF2.constants import TypArguments as TA - from PyPDF2.constants import TypFitArguments as TF # from table 8.2 of the PDF 1.7 reference. if typ == "/XYZ": @@ -1501,7 +1503,7 @@ def createStringObject( """ if isinstance(string, str): return TextStringObject(string) - elif isinstance(string, utils.bytes_type): + elif isinstance(string, bytes_type): try: if string.startswith(codecs.BOM_UTF16_BE): retval = TextStringObject(string.decode("utf-16")) diff --git a/PyPDF2/pagerange.py b/PyPDF2/pagerange.py index f31fa2396..cb990f7f3 100644 --- a/PyPDF2/pagerange.py +++ b/PyPDF2/pagerange.py @@ -10,7 +10,7 @@ import re from typing import Any, List, Tuple, Union -from PyPDF2.errors import ParseError +from .errors import ParseError _INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) diff --git a/PyPDF2/types.py b/PyPDF2/types.py index 67f14f1c0..8d9eb9d46 100644 --- a/PyPDF2/types.py +++ b/PyPDF2/types.py @@ -14,7 +14,7 @@ except ImportError: from typing_extensions import TypeAlias # type: ignore[misc] -from PyPDF2.generic import ( +from .generic import ( ArrayObject, Bookmark, Destination, diff --git a/PyPDF2/utils.py b/PyPDF2/utils.py index 381c62055..19b5533dc 100644 --- a/PyPDF2/utils.py +++ b/PyPDF2/utils.py @@ -31,10 +31,11 @@ __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" +from codecs import getencoder from io import BufferedReader, BufferedWriter, BytesIO, FileIO from typing import Any, Dict, List, Optional, Union, overload -from PyPDF2.errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError +from .errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO] @@ -211,9 +212,8 @@ def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]: def hexencode(b: bytes) -> bytes: - import codecs - coder = codecs.getencoder("hex_codec") + coder = getencoder("hex_codec") coded = coder(b) # type: ignore return coded[0] diff --git a/PyPDF2/xmp.py b/PyPDF2/xmp.py index 0add70fd7..4aff89069 100644 --- a/PyPDF2/xmp.py +++ b/PyPDF2/xmp.py @@ -6,8 +6,8 @@ from xml.dom.minidom import Element as XmlElement from xml.dom.minidom import parseString -from PyPDF2.generic import PdfObject -from PyPDF2.utils import StreamType +from .generic import PdfObject,ContentStream +from .utils import StreamType RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" DC_NAMESPACE = "http://purl.org/dc/elements/1.1/" @@ -188,8 +188,6 @@ class XmpInformation(PdfObject): Usually accessed by :meth:`getXmpMetadata()` """ - from PyPDF2.generic import ContentStream - def __init__(self, stream: ContentStream) -> None: self.stream = stream doc_root: Document = parseString(self.stream.getData())