From 2f767c0d27dc3864e08b59d5a8a2c09eef9abeca Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 10 Mar 2024 11:47:30 +0100 Subject: [PATCH 1/4] ENH: Add += and -= operators to ArrayObject add new capability required for other changes --- pypdf/generic/_data_structures.py | 44 +++++++++++++++++++++++++++++++ tests/test_generic.py | 26 ++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 3b0aede89..46c37690c 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -31,6 +31,7 @@ import logging import re +import sys from io import BytesIO from typing import ( Any, @@ -70,6 +71,7 @@ from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError from ._base import ( BooleanObject, + ByteStringObject, FloatObject, IndirectObject, NameObject, @@ -81,6 +83,11 @@ from ._fit import Fit from ._utils import read_hex_string_from_stream, read_string_from_stream +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + logger = logging.getLogger(__name__) NumberSigns = b"+-" IndirectPattern = re.compile(rb"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]") @@ -121,6 +128,43 @@ def items(self) -> Iterable[Any]: """Emulate DictionaryObject.items for a list (index, object).""" return enumerate(self) + def __to_lst(self, lst: Any) -> List[Any]: + # Convert to list, internal + if isinstance(lst, (list, tuple, set)): + pass + elif isinstance(lst, PdfObject): + lst = [lst] + elif isinstance(lst, str): + if lst[0] == "/": + lst = [NameObject(lst)] + else: + lst = [TextStringObject(lst)] + elif isinstance(lst, bytes): + lst = [ByteStringObject(lst)] + else: # for numbers,... + lst = [lst] + return lst + + def __add__(self, lst: Any) -> "ArrayObject": + """Allow extend with any list format or append""" + return ArrayObject(self) + ArrayObject(self.__to_lst(lst)) + + def __iadd__(self, lst: Any) -> Self: + """Allow extend with any list format or append""" + for x in self.__to_lst(lst): + self.append(x) + return self + + def __isub__(self, lst: Any) -> Self: + """Allow to remove items""" + for x in self.__to_lst(lst): + try: + x = self.index(x) + del self[x] + except ValueError: + pass + return self + def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: diff --git a/tests/test_generic.py b/tests/test_generic.py index f31a4f1c8..0fe5b7f5e 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1278,3 +1278,29 @@ def test_indirect_object_page_dimensions(): reader = PdfReader(data, strict=False) mediabox = reader.pages[0].mediabox assert mediabox == RectangleObject((0, 0, 792, 612)) + + +def test_array_operators(): + a = ArrayObject( + [ + NumberObject(1), + NumberObject(2), + NumberObject(3), + NumberObject(4), + ] + ) + assert a == [1, 2, 3, 4] + a -= 2 + a += "abc" + a -= (3, 4) + a += ["d", "e"] + a += BooleanObject(True) + assert a == [1, "abc", "d", "e", True] + a += "/toto" + assert isinstance(a[-1], NameObject) + assert isinstance(a[1], TextStringObject) + a += b"1234" + assert a[-1] == ByteStringObject(b"1234") + la = len(a) + a -= 300 + assert len(a) == la From 461395551a2e59a492367f7474ffda20cc6ce817 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:33:48 +0100 Subject: [PATCH 2/4] review + coverage + fix --- pypdf/generic/_data_structures.py | 36 +++++++++++++++++++++++++------ tests/test_generic.py | 3 +++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 46c37690c..04cb1ed21 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -128,7 +128,7 @@ def items(self) -> Iterable[Any]: """Emulate DictionaryObject.items for a list (index, object).""" return enumerate(self) - def __to_lst(self, lst: Any) -> List[Any]: + def _to_lst(self, lst: Any) -> List[Any]: # Convert to list, internal if isinstance(lst, (list, tuple, set)): pass @@ -146,18 +146,40 @@ def __to_lst(self, lst: Any) -> List[Any]: return lst def __add__(self, lst: Any) -> "ArrayObject": - """Allow extend with any list format or append""" - return ArrayObject(self) + ArrayObject(self.__to_lst(lst)) + """ + Allow extension by adding list or add one element only + + Args: + lst: any list, tuples are extended the list. + other types(numbers,...) will be appended. + if str is passed it will be converted into TextStringObject + or NameObject (if starting with "/") + if bytes is passed it will be converted into ByteStringObject + + Returns: + ArrayObject with all elements + """ + temp = ArrayObject(self) + temp.extend(self._to_lst(lst)) + return temp def __iadd__(self, lst: Any) -> Self: - """Allow extend with any list format or append""" - for x in self.__to_lst(lst): - self.append(x) + """ + Allow extension by adding list or add one element only + + Args: + lst: any list, tuples are extended the list. + other types(numbers,...) will be appended. + if str is passed it will be converted into TextStringObject + or NameObject (if starting with "/") + if bytes is passed it will be converted into ByteStringObject + """ + self.extend(self._to_lst(lst)) return self def __isub__(self, lst: Any) -> Self: """Allow to remove items""" - for x in self.__to_lst(lst): + for x in self._to_lst(lst): try: x = self.index(x) del self[x] diff --git a/tests/test_generic.py b/tests/test_generic.py index 0fe5b7f5e..39cf479b7 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1289,6 +1289,9 @@ def test_array_operators(): NumberObject(4), ] ) + b = a + 5 + assert isinstance(b, ArrayObject) + assert b == [1, 2, 3, 4, 5] assert a == [1, 2, 3, 4] a -= 2 a += "abc" From 4ca01679a65095786af7f0b5c998123aca95ae33 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:48:45 +0100 Subject: [PATCH 3/4] version dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e8b3f8d78..2396f6d68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ ] dependencies = [ - "typing_extensions >= 3.7.4.3; python_version < '3.10'", + "typing_extensions >= 4.0; python_version < '3.10'", "dataclasses; python_version < '3.7'", ] From 028a91057c45b9ee598398c85b3966f770caf659 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Sun, 10 Mar 2024 13:12:04 +0100 Subject: [PATCH 4/4] typing.Self is only available starting with Python 3.11 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2396f6d68..eb9e8a0a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ ] dependencies = [ - "typing_extensions >= 4.0; python_version < '3.10'", + "typing_extensions >= 4.0; python_version < '3.11'", "dataclasses; python_version < '3.7'", ]