diff --git a/pypdf/_page.py b/pypdf/_page.py index e7b47882c..7e1a1ce14 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1069,9 +1069,6 @@ def _merge_page( over: bool = True, expand: bool = False, ) -> None: - # First we work on merging the resource dictionaries. This allows us - # to find out what symbols in the content streams we might need to - # rename. try: assert isinstance(self.indirect_reference, IndirectObject) if hasattr( @@ -1083,47 +1080,43 @@ def _merge_page( except (AssertionError, AttributeError): pass - new_resources = DictionaryObject() - rename = {} + # First we work on merging the resource dictionaries. This allows us + # to find out what symbols in the content streams we might need to + # rename. try: original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) except KeyError: original_resources = DictionaryObject() try: - page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + page2_resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) except KeyError: - page2resources = DictionaryObject() - new_annots = ArrayObject() - - for page in (self, page2): - if PG.ANNOTS in page: - annots = page[PG.ANNOTS] - if isinstance(annots, ArrayObject): - new_annots.extend(annots) + page2_resources = DictionaryObject() + new_resources = DictionaryObject() + rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): - new, newrename = self._merge_resources( - original_resources, page2resources, res + new, new_rename = self._merge_resources( + original_resources, page2_resources, res ) if new: new_resources[NameObject(res)] = new - rename.update(newrename) + rename.update(new_rename) - # Combine /ProcSet sets, making sure there's a consistent order + # Combine /ProcSet sets, making sure there is a consistent order new_resources[NameObject(RES.PROC_SET)] = ArrayObject( sorted( set( original_resources.get(RES.PROC_SET, ArrayObject()).get_object() ).union( - set(page2resources.get(RES.PROC_SET, ArrayObject()).get_object()) + set(page2_resources.get(RES.PROC_SET, ArrayObject()).get_object()) ) ) ) @@ -1134,10 +1127,10 @@ def _merge_page( original_content.isolate_graphics_state() new_content_array.append(original_content) - page2content = page2.get_contents() - if page2content is not None: + page2_content = page2.get_contents() + if page2_content is not None: rect = getattr(page2, MERGE_CROP_BOX) - page2content.operations.insert( + page2_content.operations.insert( 0, ( map( @@ -1152,25 +1145,32 @@ def _merge_page( b"re", ), ) - page2content.operations.insert(1, ([], b"W")) - page2content.operations.insert(2, ([], b"n")) + page2_content.operations.insert(1, ([], b"W")) + page2_content.operations.insert(2, ([], b"n")) if page2transformation is not None: - page2content = page2transformation(page2content) - page2content = PageObject._content_stream_rename( - page2content, rename, self.pdf + page2_content = page2transformation(page2_content) + page2_content = PageObject._content_stream_rename( + page2_content, rename, self.pdf ) - page2content.isolate_graphics_state() + page2_content.isolate_graphics_state() if over: - new_content_array.append(page2content) + new_content_array.append(page2_content) else: - new_content_array.insert(0, page2content) + new_content_array.insert(0, page2_content) # if expanding the page to fit a new page, calculate the new media box size if expand: self._expand_mediabox(page2, ctm) - self.replace_contents(ContentStream(new_content_array, self.pdf)) self[NameObject(PG.RESOURCES)] = new_resources + self.replace_contents(ContentStream(new_content_array, self.pdf)) + + new_annots = ArrayObject() + for page in (self, page2): + if PG.ANNOTS in page: + annots = page[PG.ANNOTS] + if isinstance(annots, ArrayObject): + new_annots.extend(annots) self[NameObject(PG.ANNOTS)] = new_annots def _merge_page_writer( @@ -1187,7 +1187,6 @@ def _merge_page_writer( assert isinstance(self.indirect_reference, IndirectObject) pdf = self.indirect_reference.pdf - rename = {} if PG.RESOURCES not in self: self[NameObject(PG.RESOURCES)] = DictionaryObject() original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) @@ -1196,13 +1195,14 @@ def _merge_page_writer( else: page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): if res in page2resources: @@ -1212,7 +1212,7 @@ def _merge_page_writer( original_resources, page2resources, res, False ) rename.update(newrename) - # Combine /ProcSet sets. + # Combine /ProcSet sets if RES.PROC_SET in page2resources: if RES.PROC_SET not in original_resources: original_resources[NameObject(RES.PROC_SET)] = ArrayObject() diff --git a/tests/test_page.py b/tests/test_page.py index f7f1b9430..62c5989f4 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -13,6 +13,7 @@ from pypdf import PdfReader, PdfWriter, Transformation from pypdf._page import PageObject +from pypdf.annotations import Polygon from pypdf.constants import PageAttributes as PG from pypdf.errors import PdfReadError, PdfReadWarning, PyPdfError from pypdf.generic import ( @@ -203,14 +204,14 @@ def test_transformation_equivalence2(): writer.pages[0].merge_transformed_page( reader_add.pages[0], Transformation().scale(2).translate(100, 100), True, False ) - # No special assert: the test should be visual in a viewer; 2 box with a arrow rotated and translated + # No special assert: the test should be visual in a viewer; 2 box with a arrow rotated and translated writer = PdfWriter() writer.append(reader_add) writer.pages[0].merge_transformed_page( reader_base.pages[0], Transformation(), True, True ) - # No special assert: Visual check the page has been increased and all is visible (box+graph) + # No special assert: Visual check the page has been increased and all is visible (box+graph) writer = PdfWriter() writer.append(reader_add) @@ -221,7 +222,7 @@ def test_transformation_equivalence2(): False, False, ) - # No special assert: Visual check the page has been increased and all is visible (box+graph) + # No special assert: Visual check the page has been increased and all is visible (box+graph) pdf_path = RESOURCE_ROOT / "commented-xmp.pdf" reader_comments = PdfReader(pdf_path) @@ -426,7 +427,7 @@ def test_iss_1142(): ( "https://github.com/py-pdf/pypdf/files/9428434/TelemetryTX_EM.pdf", "tika-964029.pdf", - ), # no_ressources + ), # no_resources ( # https://www.itu.int/rec/T-REC-X.25-199610-I/en "https://github.com/py-pdf/pypdf/files/12423313/T-REC-X.25-199610-I.PDF-E.pdf", @@ -878,7 +879,7 @@ def test_annotation_getter(): def test_annotation_setter(pdf_file_path): - # Arange + # Arrange pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] @@ -1008,6 +1009,27 @@ def test_no_resources(): page_one.merge_page(page_two) +def test_merge_page_with_multiple_annotations(): + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + annotation_1 = Polygon( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + writer.add_annotation(0, annotation_1) + annotation_2 = Polygon( + vertices=[(40, 540), (200, 640), (70, 740), (50, 700)], + ) + writer.add_annotation(0, annotation_2) + + page_one = writer.pages[0] + page_two = writer.pages[0] + page_one.merge_page(page_two) + + def test_merge_page_reproducible_with_proc_set(): page1 = PageObject.create_blank_page(width=100, height=100) page2 = PageObject.create_blank_page(width=100, height=100)