From 91b6dcd7c19e226f3bc78ba0661f4811a4239655 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Sun, 24 Sep 2023 11:39:38 +0200 Subject: [PATCH] BUG: Fix test_watermarking_reportlab_rendering() (#2203) This fixes the issue spotted in #2191 The solution was to re-introduce calls to `PageObject._push_pop_gs()`, in `PageObject._merge_page` & `PageObject._merge_page_writer()`, but to optimize `PageObject._push_pop_gs()` by introducing a `ContentsStream.isolate_graphics_state()` method. --- pypdf/_page.py | 26 +++++--------------------- pypdf/_utils.py | 4 ++-- pypdf/generic/_data_structures.py | 7 +++++++ tests/test_writer.py | 6 +----- 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 55054c47b..32720992a 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -880,23 +880,6 @@ def _content_stream_rename( raise KeyError(f"type of operands is {type(operands)}") return stream - @staticmethod - def _push_pop_gs( - contents: Any, - pdf: Union[None, PdfReaderProtocol, PdfWriterProtocol], - use_original: bool = True, - ) -> ContentStream: - # adds a graphics state "push" and "pop" to the beginning and end - # of a content stream. This isolates it from changes such as - # transformation matricies. - if use_original: - stream = contents - else: - stream = ContentStream(contents, pdf) - stream.operations.insert(0, ([], "q")) - stream.operations.append(([], "Q")) - return stream - @staticmethod def _add_transformation_matrix( contents: Any, @@ -1127,6 +1110,7 @@ def _merge_page( new_content_array = ArrayObject() original_content = self.get_contents() if original_content is not None: + original_content.isolate_graphics_state() new_content_array.append(original_content) page2content = page2.get_contents() @@ -1154,7 +1138,7 @@ def _merge_page( page2content = PageObject._content_stream_rename( page2content, rename, self.pdf ) - page2content = PageObject._push_pop_gs(page2content, self.pdf) + page2content.isolate_graphics_state() if over: new_content_array.append(page2content) else: @@ -1262,9 +1246,9 @@ def _merge_page_writer( pass new_content_array = ArrayObject() - original_content = self.get_contents() if original_content is not None: + original_content.isolate_graphics_state() new_content_array.append(original_content) page2content = page2.get_contents() @@ -1292,7 +1276,7 @@ def _merge_page_writer( page2content = PageObject._content_stream_rename( page2content, rename, self.pdf ) - page2content = PageObject._push_pop_gs(page2content, self.pdf) + page2content.isolate_graphics_state() if over: new_content_array.append(page2content) else: @@ -1642,7 +1626,7 @@ def add_transformation( content = self.get_contents() if content is not None: content = PageObject._add_transformation_matrix(content, self.pdf, ctm) - content = PageObject._push_pop_gs(content, self.pdf) + content.isolate_graphics_state() self.replace_contents(content) # if expanding the page to fit a new page, calculate the new media box size if expand: diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 34944b1bb..0bc12c0bf 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -328,11 +328,11 @@ def mark_location(stream: StreamType) -> None: def b_(s: Union[str, bytes]) -> bytes: + if isinstance(s, bytes): + return s bc = B_CACHE if s in bc: return bc[s] - if isinstance(s, bytes): - return s try: r = s.encode("latin-1") if len(s) < 2: diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 9ad98c240..88a17d85a 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -1229,6 +1229,13 @@ def operations(self, operations: List[Tuple[Any, Any]]) -> None: self._operations = operations self._data = b"" + def isolate_graphics_state(self) -> None: + if self._operations: + self._operations.insert(0, ([], "q")) + self._operations.append(([], "Q")) + elif self._data: + self._data = b"q\n" + b_(self._data) + b"Q\n" + # This overrides the parent method: def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None diff --git a/tests/test_writer.py b/tests/test_writer.py index fac99a8f2..70352833f 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1619,10 +1619,9 @@ def test_watermark_rendering(tmp_path): assert image_similarity(png_path, target_png_path) >= 0.95 -@pytest.mark.xfail(reason="issue introduced with pypdf==3.15.4") def test_watermarking_reportlab_rendering(tmp_path): """ - This test shows that the merged page is rotated+mirrored. + This test is showing a rotated+mirrored watermark in pypdf==3.15.4. Replacing the generate_base with e.g. the crazyones did not show the issue. """ @@ -1637,9 +1636,6 @@ def test_watermarking_reportlab_rendering(tmp_path): base_page.merge_page(watermark) writer.add_page(base_page) - for page in writer.pages: - page.compress_content_streams() - target_png_path = RESOURCE_ROOT / "test_watermarking_reportlab_rendering.png" pdf_path = tmp_path / "out.pdf" png_path = tmp_path / "test_watermarking_reportlab_rendering.png"