diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 62d3094023..e4599bc746 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -913,16 +913,8 @@ def _sweep_indirect_references( if newobj is None: try: newobj = data.pdf.get_object(data) - hash_value = None - if newobj is not None: - hash_value = newobj.hash_value() - # Check if object is already added to pdf. - if hash_value in self._idnum_hash: - return IndirectObject(self._idnum_hash[hash_value], 0, self) self._objects.append(None) # placeholder idnum = len(self._objects) - if hash_value is not None: - self._idnum_hash[hash_value] = idnum newobj_ido = IndirectObject(idnum, 0, self) if data.pdf not in extern_map: extern_map[data.pdf] = {} diff --git a/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf b/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf new file mode 100644 index 0000000000..aff79f1a4a Binary files /dev/null and b/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf differ diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 8ad201b324..2d753382bb 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -244,6 +244,9 @@ def test_merge_with_warning(url, name): ): merger.write("tmp.merged.pdf") + # Cleanup + os.remove("tmp.merged.pdf") + @pytest.mark.parametrize( ("url", "name"), @@ -261,6 +264,9 @@ def test_merge(url, name): merger.append(reader) merger.write("tmp.merged.pdf") + # Cleanup + os.remove("tmp.merged.pdf") + @pytest.mark.parametrize( ("url", "name"), @@ -335,3 +341,26 @@ def test_scale_rectangle_indirect_object(): for page in reader.pages: page.scale(sx=2, sy=3) + + +def test_merge_output(): + # Arrange + base = os.path.join(RESOURCE_ROOT, "Seige_of_Vicksburg_Sample_OCR.pdf") + crazy = os.path.join(RESOURCE_ROOT, "crazyones.pdf") + expected = os.path.join( + RESOURCE_ROOT, "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" + ) + + # Act + merger = PdfMerger(strict=True) + with pytest.warns(PdfReadWarning): + merger.append(base) + merger.merge(1, crazy) + stream = BytesIO() + merger.write(stream) + + # Assert + stream.seek(0) + with open(expected, "rb") as fp: + expected_data = fp.read() + assert stream.read() == expected_data