From 90e9d8741259993174a620a3c641a4ee3bfedaff Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 14:54:16 +0100 Subject: [PATCH 1/9] MAINT: Increase readability of _merge_page Mainly moving declarations nearer their use. --- pypdf/_page.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 45690a5922..eb8f32477b 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1122,9 +1122,6 @@ def _merge_page( over: bool = True, expand: bool = False, ) -> None: - # First we work on merging the resource dictionaries. This allows us - # to find out what symbols in the content streams we might need to - # rename. try: assert isinstance(self.indirect_reference, IndirectObject) if hasattr( @@ -1136,8 +1133,9 @@ def _merge_page( except (AssertionError, AttributeError): pass - new_resources = DictionaryObject() - rename = {} + # First we work on merging the resource dictionaries. This allows us + # to find out what symbols in the content streams we might need to + # rename. try: original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) except KeyError: @@ -1146,14 +1144,8 @@ def _merge_page( page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) except KeyError: page2resources = DictionaryObject() - new_annots = ArrayObject() - - for page in (self, page2): - if PG.ANNOTS in page: - annots = page[PG.ANNOTS] - if isinstance(annots, ArrayObject): - new_annots.extend(annots) + rename = {} for res in ( RES.EXT_G_STATE, RES.FONT, @@ -1170,7 +1162,8 @@ def _merge_page( new_resources[NameObject(res)] = new rename.update(newrename) - # Combine /ProcSet sets, making sure there's a consistent order + # Combine /ProcSet sets, making sure there is a consistent order + new_resources = DictionaryObject() new_resources[NameObject(RES.PROC_SET)] = ArrayObject( sorted( set( @@ -1222,8 +1215,15 @@ def _merge_page( if expand: self._expand_mediabox(page2, ctm) - self.replace_contents(ContentStream(new_content_array, self.pdf)) self[NameObject(PG.RESOURCES)] = new_resources + self.replace_contents(ContentStream(new_content_array, self.pdf)) + + new_annots = ArrayObject() + for page in (self, page2): + if PG.ANNOTS in page: + annots = page[PG.ANNOTS] + if isinstance(annots, ArrayObject): + new_annots.extend(annots) self[NameObject(PG.ANNOTS)] = new_annots def _merge_page_writer( @@ -1240,7 +1240,6 @@ def _merge_page_writer( assert isinstance(self.indirect_reference, IndirectObject) pdf = self.indirect_reference.pdf - rename = {} if PG.RESOURCES not in self: self[NameObject(PG.RESOURCES)] = DictionaryObject() original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) @@ -1249,6 +1248,7 @@ def _merge_page_writer( else: page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + rename = {} for res in ( RES.EXT_G_STATE, RES.FONT, @@ -1265,7 +1265,7 @@ def _merge_page_writer( original_resources, page2resources, res, False ) rename.update(newrename) - # Combine /ProcSet sets. + # Combine /ProcSet sets if RES.PROC_SET in page2resources: if RES.PROC_SET not in original_resources: original_resources[NameObject(RES.PROC_SET)] = ArrayObject() From ad6e02ad8107e5a08a6125884666581636d9835a Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 14:59:05 +0100 Subject: [PATCH 2/9] MAINT: Increase readability of _merge_page Mainly moving declarations nearer their use. --- pypdf/_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index eb8f32477b..5fc64a79ec 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1145,6 +1145,7 @@ def _merge_page( except KeyError: page2resources = DictionaryObject() + new_resources = DictionaryObject() rename = {} for res in ( RES.EXT_G_STATE, @@ -1163,7 +1164,6 @@ def _merge_page( rename.update(newrename) # Combine /ProcSet sets, making sure there is a consistent order - new_resources = DictionaryObject() new_resources[NameObject(RES.PROC_SET)] = ArrayObject( sorted( set( From 7387f0bd6bcd71ce6902668b1cfb4919f992da3e Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 20:59:45 +0100 Subject: [PATCH 3/9] MAINT: Increase readability of _merge_page --- pypdf/_page.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 5fc64a79ec..c84798d862 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1141,9 +1141,9 @@ def _merge_page( except KeyError: original_resources = DictionaryObject() try: - page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + page2_resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) except KeyError: - page2resources = DictionaryObject() + page2_resources = DictionaryObject() new_resources = DictionaryObject() rename = {} @@ -1156,12 +1156,12 @@ def _merge_page( RES.SHADING, RES.PROPERTIES, ): - new, newrename = self._merge_resources( - original_resources, page2resources, res + new, new_rename = self._merge_resources( + original_resources, page2_resources, res ) if new: new_resources[NameObject(res)] = new - rename.update(newrename) + rename.update(new_rename) # Combine /ProcSet sets, making sure there is a consistent order new_resources[NameObject(RES.PROC_SET)] = ArrayObject( @@ -1169,7 +1169,7 @@ def _merge_page( set( original_resources.get(RES.PROC_SET, ArrayObject()).get_object() ).union( - set(page2resources.get(RES.PROC_SET, ArrayObject()).get_object()) + set(page2_resources.get(RES.PROC_SET, ArrayObject()).get_object()) ) ) ) @@ -1180,10 +1180,10 @@ def _merge_page( original_content.isolate_graphics_state() new_content_array.append(original_content) - page2content = page2.get_contents() - if page2content is not None: + page2_content = page2.get_contents() + if page2_content is not None: rect = getattr(page2, MERGE_CROP_BOX) - page2content.operations.insert( + page2_content.operations.insert( 0, ( map( @@ -1198,18 +1198,18 @@ def _merge_page( b"re", ), ) - page2content.operations.insert(1, ([], b"W")) - page2content.operations.insert(2, ([], b"n")) + page2_content.operations.insert(1, ([], b"W")) + page2_content.operations.insert(2, ([], b"n")) if page2transformation is not None: - page2content = page2transformation(page2content) - page2content = PageObject._content_stream_rename( - page2content, rename, self.pdf + page2_content = page2transformation(page2_content) + page2_content = PageObject._content_stream_rename( + page2_content, rename, self.pdf ) - page2content.isolate_graphics_state() + page2_content.isolate_graphics_state() if over: - new_content_array.append(page2content) + new_content_array.append(page2_content) else: - new_content_array.insert(0, page2content) + new_content_array.insert(0, page2_content) # if expanding the page to fit a new page, calculate the new media box size if expand: From 17afa32571b7a50db188854843db8532efb9288b Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Thu, 22 May 2025 08:52:42 +0100 Subject: [PATCH 4/9] MAINT: Increase readability of _merge_page --- pypdf/_page.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index c84798d862..2d0a1dd6a0 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1149,11 +1149,11 @@ def _merge_page( rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): new, new_rename = self._merge_resources( @@ -1251,11 +1251,11 @@ def _merge_page_writer( rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): if res in page2resources: From 445d96562a90d545addc950b12a22c74977c74a1 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:27:23 +0000 Subject: [PATCH 5/9] Increase code coverage --- tests/test_page.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_page.py b/tests/test_page.py index f7f1b9430a..9fd5e4bb14 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -203,14 +203,14 @@ def test_transformation_equivalence2(): writer.pages[0].merge_transformed_page( reader_add.pages[0], Transformation().scale(2).translate(100, 100), True, False ) - # No special assert: the test should be visual in a viewer; 2 box with a arrow rotated and translated + # No special assert: the test should be visual in a viewer; 2 box with a arrow rotated and translated writer = PdfWriter() writer.append(reader_add) writer.pages[0].merge_transformed_page( reader_base.pages[0], Transformation(), True, True ) - # No special assert: Visual check the page has been increased and all is visible (box+graph) + # No special assert: Visual check the page has been increased and all is visible (box+graph) writer = PdfWriter() writer.append(reader_add) @@ -221,7 +221,7 @@ def test_transformation_equivalence2(): False, False, ) - # No special assert: Visual check the page has been increased and all is visible (box+graph) + # No special assert: Visual check the page has been increased and all is visible (box+graph) pdf_path = RESOURCE_ROOT / "commented-xmp.pdf" reader_comments = PdfReader(pdf_path) @@ -426,7 +426,7 @@ def test_iss_1142(): ( "https://github.com/py-pdf/pypdf/files/9428434/TelemetryTX_EM.pdf", "tika-964029.pdf", - ), # no_ressources + ), # no_resources ( # https://www.itu.int/rec/T-REC-X.25-199610-I/en "https://github.com/py-pdf/pypdf/files/12423313/T-REC-X.25-199610-I.PDF-E.pdf", @@ -878,7 +878,7 @@ def test_annotation_getter(): def test_annotation_setter(pdf_file_path): - # Arange + # Arrange pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] @@ -1008,6 +1008,14 @@ def test_no_resources(): page_one.merge_page(page_two) +def test_multiple_annotations(): + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + reader = PdfReader(pdf_path) + page_one = reader.pages[0] + page_two = reader.pages[0] + page_one.merge_page(page_two) + + def test_merge_page_reproducible_with_proc_set(): page1 = PageObject.create_blank_page(width=100, height=100) page2 = PageObject.create_blank_page(width=100, height=100) From c0384d551976cac7b6db063fe4b1789e2798ed43 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:57:47 +0100 Subject: [PATCH 6/9] Rename test --- tests/test_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_page.py b/tests/test_page.py index 9fd5e4bb14..622afa0c24 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1008,7 +1008,7 @@ def test_no_resources(): page_one.merge_page(page_two) -def test_multiple_annotations(): +def test_merge_page_coverage(): pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) page_one = reader.pages[0] From 3b8ef7e662d695fd7e89f372be57342d6c6c3150 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:18:13 +0000 Subject: [PATCH 7/9] Increase code coverage --- tests/test_page.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/test_page.py b/tests/test_page.py index 622afa0c24..51c6c9ccfc 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -13,6 +13,7 @@ from pypdf import PdfReader, PdfWriter, Transformation from pypdf._page import PageObject +from pypdf.annotations import Polygon from pypdf.constants import PageAttributes as PG from pypdf.errors import PdfReadError, PdfReadWarning, PyPdfError from pypdf.generic import ( @@ -1008,11 +1009,24 @@ def test_no_resources(): page_one.merge_page(page_two) -def test_merge_page_coverage(): +def test_merge_page_multiple_annotations(): pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) - page_one = reader.pages[0] - page_two = reader.pages[0] + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + annotation_1 = Polygon( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + annotation_2 = Polygon( + vertices=[(40, 540), (200, 640), (70, 740), (50, 700)], + ) + writer.add_annotation(0, annotation_1) + writer.add_annotation(0, annotation_2) + + page_one = writer.pages[0] + page_two = writer.pages[0] page_one.merge_page(page_two) From bde53cac6a35c4ee25fca6cb57f716422ce565f2 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:21:19 +0000 Subject: [PATCH 8/9] Remove whitespace from blank line --- tests/test_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_page.py b/tests/test_page.py index 51c6c9ccfc..c706cd0a07 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1024,7 +1024,7 @@ def test_merge_page_multiple_annotations(): ) writer.add_annotation(0, annotation_1) writer.add_annotation(0, annotation_2) - + page_one = writer.pages[0] page_two = writer.pages[0] page_one.merge_page(page_two) From 73d796e22eba9a283a002d1cf587a5a2d5a793c8 Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:36:56 +0000 Subject: [PATCH 9/9] Try to fix error --- tests/test_page.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_page.py b/tests/test_page.py index c706cd0a07..62c5989f4a 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -1009,7 +1009,7 @@ def test_no_resources(): page_one.merge_page(page_two) -def test_merge_page_multiple_annotations(): +def test_merge_page_with_multiple_annotations(): pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] @@ -1019,10 +1019,10 @@ def test_merge_page_multiple_annotations(): annotation_1 = Polygon( vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], ) + writer.add_annotation(0, annotation_1) annotation_2 = Polygon( vertices=[(40, 540), (200, 640), (70, 740), (50, 700)], ) - writer.add_annotation(0, annotation_1) writer.add_annotation(0, annotation_2) page_one = writer.pages[0]