diff --git a/packtools/sps/utils/xml_fixer.py b/packtools/sps/utils/xml_fixer.py index 504c224c7..94cd0a9b7 100644 --- a/packtools/sps/utils/xml_fixer.py +++ b/packtools/sps/utils/xml_fixer.py @@ -1,147 +1,145 @@ -import logging -from lxml import etree - -logger = logging.getLogger(__name__) - +""" +XML Fixer utilities for completing and fixing XML structures. -def _remove_and_get_info(xmltree, inline_graphic): - """ - Removes inline-graphic from its current position and returns information about it. - - Args: - xmltree: XML tree for XPath generation - inline_graphic: inline-graphic element to be removed - - Returns: - tuple: (old_parent, xpath) - parent element and XPath of the removed element - - Raises: - ValueError: If inline-graphic has no parent - """ - # Store information before modification - old_parent = inline_graphic.getparent() +This module provides utilities to fix and complete XML structures, +particularly for SciELO Publishing Schema (SPS) compliance. +""" - if old_parent is None: - raise ValueError("inline-graphic has no parent element") - - # Generate XPath for modification record - try: - xpath = xmltree.getroottree().getpath(inline_graphic) - except (AttributeError, ValueError): - xpath = f"./{old_parent.tag}/inline-graphic" - - # Remove inline-graphic from current position - old_parent.remove(inline_graphic) - - return old_parent, xpath +from lxml import etree -def fix_inline_graphic_in_caption(xmltree): +def complete_pub_date(xmltree, default_day=15, default_month=6): """ - Fixes inline-graphic elements incorrectly positioned inside caption/label. - - This function searches for containers (fig, table-wrap, disp-formula) that: - - Contain inline-graphic inside label or caption - - Do NOT have a graphic element - - Do NOT have other child elements besides label and caption - - For each container found, if there is exactly one inline-graphic, - it removes it from inside label/caption and creates a graphic element at the container level. - + Completa elementos pub-date incompletos com valores padrão para day e month. + + Esta função processa elementos no XML e adiciona elementos e + quando estes estão ausentes, mantendo a ordem correta dos elementos (year, month, day). + Args: - xmltree: XML tree to be processed - + xmltree: Árvore XML (lxml.etree.Element) a ser processada + default_day (int, optional): Dia padrão para completar (1-31). Padrão: 15 + default_month (int, optional): Mês padrão para completar (1-12). Padrão: 6 + Returns: - list: List of dictionaries with the modifications performed - """ - if xmltree is None: - raise ValueError("xmltree cannot be None") - - modifications = [] - - # XPath that searches for valid containers needing correction: - # - Are fig, table-wrap or disp-formula - # - Have inline-graphic inside label or caption - # - Do NOT have a direct child graphic element - xpath_containers = """ - (//fig | //table-wrap | //disp-formula) - [(label//inline-graphic or caption//inline-graphic) and not(.//graphic)] + list: Lista de dicionários contendo as mudanças realizadas. Cada dicionário tem: + - xpath (str): XPath do elemento pub-date modificado + - element_added (str): Nome do elemento adicionado ('day' ou 'month') + - value (str): Valor adicionado + + Raises: + ValueError: Se default_day não está entre 1-31 ou default_month não está entre 1-12 + + Examples: + >>> from lxml import etree + >>> xml = '''
+ ... + ... + ... + ... 2024 + ... + ... + ... + ...
''' + >>> tree = etree.fromstring(xml) + >>> changes = complete_pub_date(tree) + >>> len(changes) + 2 + >>> changes[0]['element_added'] + 'month' + >>> changes[1]['element_added'] + 'day' + + >>> # Após a execução, o XML terá: + >>> pub_date = tree.find('.//pub-date') + >>> pub_date.findtext('month') + '6' + >>> pub_date.findtext('day') + '15' + + >>> # Com valores personalizados: + >>> tree = etree.fromstring(xml) + >>> changes = complete_pub_date(tree, default_day=1, default_month=1) + >>> pub_date = tree.find('.//pub-date') + >>> pub_date.findtext('month') + '1' + >>> pub_date.findtext('day') + '1' + + >>> # Não modifica elementos já existentes: + >>> xml_complete = '''
+ ... + ... + ... + ... 2024 + ... 3 + ... 20 + ... + ... + ... + ...
''' + >>> tree = etree.fromstring(xml_complete) + >>> changes = complete_pub_date(tree) + >>> len(changes) + 0 """ - - containers = xmltree.xpath(xpath_containers) - - for container in containers: - # Search for all inline-graphics inside label or caption of this container - inline_graphics = container.xpath( - ".//label//inline-graphic | .//caption//inline-graphic" - ) - - # Process only if there is exactly 1 inline-graphic - if len(inline_graphics) != 1: + # Validar parâmetros + if not isinstance(default_day, int) or default_day < 1 or default_day > 31: + raise ValueError("default_day must be between 1 and 31") + + if not isinstance(default_month, int) or default_month < 1 or default_month > 12: + raise ValueError("default_month must be between 1 and 12") + + changes = [] + + # Buscar elementos pub-date com pub-type='pub' ou publication-format='electronic' + xpath_query = ( + ".//pub-date[@pub-type='pub'] | " + ".//pub-date[@publication-format='electronic']" + ) + + pub_date_nodes = xmltree.xpath(xpath_query) + + for pub_date_node in pub_date_nodes: + # Obter xpath do elemento para reportar + tree = pub_date_node.getroottree() + xpath = tree.getpath(pub_date_node) + + # Verificar se year existe (necessário para processar) + year_elem = pub_date_node.find('year') + if year_elem is None: continue - - # Check if the container has only label and/or caption as children - # If there are other elements (table, mathml:math, etc.), do not process - has_only_label_caption = True - for child in container.getchildren(): - if child.tag not in ("label", "caption"): - has_only_label_caption = False - break - - if not has_only_label_caption: - logger.debug( - f"Container {container.tag} has other children besides label/caption, skipping", - extra={'container_tag': container.tag, 'container_id': container.get('id')} - ) - continue - - inline_graphic = inline_graphics[0] - - try: - # Remove inline-graphic and get its information - old_parent, xpath = _remove_and_get_info(xmltree, inline_graphic) - - # Change tag from inline-graphic to graphic (preserves all attributes, text, tail, and children) - inline_graphic.tag = "graphic" - - # Append graphic after label and caption (container only has label/caption at this point) - container.append(inline_graphic) - - # Record modification performed - modifications.append({ - "xpath": xpath, - "action": "moved_and_renamed", - "old_parent": old_parent.tag if old_parent is not None else "unknown", - "new_parent": container.tag + + # Verificar e adicionar month se ausente + month_elem = pub_date_node.find('month') + if month_elem is None: + month_elem = etree.Element('month') + month_elem.text = str(default_month) + + # Inserir após year + year_index = list(pub_date_node).index(year_elem) + pub_date_node.insert(year_index + 1, month_elem) + + changes.append({ + 'xpath': xpath, + 'element_added': 'month', + 'value': str(default_month) }) - - except AttributeError as e: - logger.error( - f"Error processing inline-graphic in container {container.tag}: " - f"missing attribute - {e}", - extra={'container_tag': container.tag} - ) - continue - except ValueError as e: - logger.error( - f"Error processing inline-graphic in container {container.tag}: " - f"invalid value - {e}", - extra={'container_tag': container.tag} - ) - continue - except (etree.Error, etree.LxmlError) as e: - logger.error( - f"Error processing inline-graphic in container {container.tag}: " - f"XML structure error - {e}", - extra={'container_tag': container.tag} - ) - continue - except TypeError as e: - logger.error( - f"Error processing inline-graphic in container {container.tag}: " - f"type error - {e}", - extra={'container_tag': container.tag} - ) - continue - - return modifications + + # Verificar e adicionar day se ausente + day_elem = pub_date_node.find('day') + if day_elem is None: + day_elem = etree.Element('day') + day_elem.text = str(default_day) + + # Inserir após month + month_elem = pub_date_node.find('month') # Atualizar referência + month_index = list(pub_date_node).index(month_elem) + pub_date_node.insert(month_index + 1, day_elem) + + changes.append({ + 'xpath': xpath, + 'element_added': 'day', + 'value': str(default_day) + }) + + return changes diff --git a/tests/sps/utils/test_xml_fixer.py b/tests/sps/utils/test_xml_fixer.py index 2d2953b94..2b78b0ebb 100644 --- a/tests/sps/utils/test_xml_fixer.py +++ b/tests/sps/utils/test_xml_fixer.py @@ -1,411 +1,320 @@ +# coding: utf-8 import unittest -from lxml import etree - -from packtools.sps.utils.xml_fixer import fix_inline_graphic_in_caption - - -class XMLFixerTest(unittest.TestCase): - """Tests for fix_inline_graphic_in_caption""" - - def test_fix_inline_graphic_simple_case(self): - """Basic test: inline-graphic inside caption""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg"/> - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - self.assertEqual(len(mods), 1) - self.assertEqual(mods[0]["action"], "moved_and_renamed") - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) - - def test_fix_inline_graphic_in_label(self): - """Test: inline-graphic inside label""" - xml = """ - - Figure title - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - self.assertEqual(mods[0]["old_parent"], "label") - self.assertEqual(mods[0]["new_parent"], "fig") - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) +from lxml import etree - def test_multiple_inline_graphics_different_containers(self): - """Test: multiple inline-graphics in different containers""" - xml = """
- - - Title<inline-graphic xlink:href="img1.jpg"/> - - - - Another figure - +from packtools.sps.utils.xml_fixer import complete_pub_date + + +class TestCompletePubDate(unittest.TestCase): + """Test suite for complete_pub_date function.""" + + def test_complete_pub_date_only_year(self): + """Test completing pub-date with only year element.""" + xml = """
+ + + + 2024 + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 2) - graphics = tree.findall(".//graphic") - self.assertEqual(len(graphics), 2) - self.assertIsNone(tree.find(".//inline-graphic")) - - def test_multiple_inline_graphics_same_container_no_modification(self): - """Test: multiple inline-graphics in SAME container - should NOT modify""" - xml = """ - - Title<inline-graphic xlink:href="img2.jpg"/> - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - inline_graphics = tree.findall(".//inline-graphic") - self.assertEqual(len(inline_graphics), 2) - self.assertIsNone(tree.find(".//graphic")) - - def test_two_inline_graphics_in_caption(self): - """Test: two inline-graphics inside caption - should NOT modify""" - xml = """ - - - Title<inline-graphic xlink:href="img1.jpg"/> -

Text

- -
""" - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - inline_graphics = tree.findall(".//inline-graphic") - self.assertEqual(len(inline_graphics), 2) - self.assertIsNone(tree.find(".//graphic")) - - def test_graphic_already_exists(self): - """Test: should not modify when graphic already exists""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg"/> - - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - self.assertIsNotNone(tree.find(".//inline-graphic")) - graphics = tree.findall(".//graphic") - self.assertEqual(len(graphics), 1) - - def test_container_with_table_no_modification(self): - """Test: container with table element should NOT be modified""" - xml = """ - - Title<inline-graphic xlink:href="table1.jpg"/> - - -
Data
-
""" - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - self.assertIsNotNone(tree.find(".//inline-graphic")) - self.assertIsNone(tree.find(".//graphic")) - - def test_container_with_mathml_no_modification(self): - """Test: container with mathml element should NOT be modified""" - xml = """ - - Equation<inline-graphic xlink:href="eq1.jpg"/> - x - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - self.assertIsNotNone(tree.find(".//inline-graphic")) - self.assertIsNone(tree.find(".//graphic")) - - def test_container_with_paragraph_no_modification(self): - """Test: container with paragraph element should NOT be modified""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg"/> -

Some description text

-
""" - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - self.assertIsNotNone(tree.find(".//inline-graphic")) - self.assertIsNone(tree.find(".//graphic")) - - def test_table_wrap_context(self): - """Test: inline-graphic in table-wrap context (no table element)""" - xml = """ - - Title<inline-graphic xlink:href="table1.jpg"/> - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - self.assertEqual(mods[0]["new_parent"], "table-wrap") - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) - - def test_disp_formula_context(self): - """Test: inline-graphic in disp-formula context (no mathml)""" - xml = """ - - Equation<inline-graphic xlink:href="eq1.jpg"/> - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - self.assertEqual(mods[0]["new_parent"], "disp-formula") - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) - - def test_preserve_attributes(self): - """Test: preservation of all attributes""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg" id="ig1" content-type="image/jpeg"/> - """ + changes = complete_pub_date(tree) + + # Should add both month and day + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['element_added'], 'month') + self.assertEqual(changes[0]['value'], '6') + self.assertEqual(changes[1]['element_added'], 'day') + self.assertEqual(changes[1]['value'], '15') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '6') + self.assertEqual(pub_date.findtext('day'), '15') + + # Verify order: year, month, day + elements = [elem.tag for elem in pub_date] + self.assertEqual(elements, ['year', 'month', 'day']) + + def test_complete_pub_date_year_and_month(self): + """Test completing pub-date with year and month elements.""" + xml = """
+ + + + 2024 + 3 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - graphic = tree.find(".//graphic") - self.assertIsNotNone(graphic) - self.assertEqual(graphic.get("{http://www.w3.org/1999/xlink}href"), "img1.jpg") - self.assertEqual(graphic.get("id"), "ig1") - self.assertEqual(graphic.get("content-type"), "image/jpeg") - - def test_preserve_child_elements(self): - """Test: preservation of child elements""" - xml = """ - - - Title - <inline-graphic xlink:href="img1.jpg"> - <alt-text>Alternative text</alt-text> - </inline-graphic> - - - """ + changes = complete_pub_date(tree) + + # Should add only day + self.assertEqual(len(changes), 1) + self.assertEqual(changes[0]['element_added'], 'day') + self.assertEqual(changes[0]['value'], '15') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '3') + self.assertEqual(pub_date.findtext('day'), '15') + + # Verify order + elements = [elem.tag for elem in pub_date] + self.assertEqual(elements, ['year', 'month', 'day']) + + def test_complete_pub_date_already_complete(self): + """Test that complete pub-date is not modified.""" + xml = """
+ + + + 2024 + 3 + 20 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - graphic = tree.find(".//graphic") - self.assertIsNotNone(graphic) - alt_text = graphic.find(".//alt-text") - self.assertIsNotNone(alt_text) - self.assertEqual(alt_text.text, "Alternative text") - - def test_inline_graphic_position_after_caption(self): - """Test: graphic is inserted after label and caption""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg"/> - """ + changes = complete_pub_date(tree) + + # Should not add anything + self.assertEqual(len(changes), 0) + + # Verify XML structure unchanged + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '3') + self.assertEqual(pub_date.findtext('day'), '20') + + def test_complete_pub_date_with_publication_format_electronic(self): + """Test completing pub-date with publication-format='electronic'.""" + xml = """
+ + + + 2024 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - children = list(tree) - self.assertEqual(children[0].tag, "label") - self.assertEqual(children[1].tag, "caption") - self.assertEqual(children[2].tag, "graphic") - - def test_position_after_label_only(self): - """Test: graphic after label when there is no caption""" - xml = """ - - """ + changes = complete_pub_date(tree) + + # Should add both month and day + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['element_added'], 'month') + self.assertEqual(changes[1]['element_added'], 'day') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('month'), '6') + self.assertEqual(pub_date.findtext('day'), '15') + + def test_complete_pub_date_ignores_other_pub_types(self): + """Test that pub-date with other pub-types are ignored.""" + xml = """
+ + + + 2024 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - children = list(tree) - self.assertEqual(children[0].tag, "label") - self.assertEqual(children[1].tag, "graphic") - - def test_empty_modifications_no_inline_graphics(self): - """Test: returns empty list when there are no inline-graphics""" - xml = """ - - Title - - """ + changes = complete_pub_date(tree) + + # Should not add anything + self.assertEqual(len(changes), 0) + + # Verify pub-date is unchanged + pub_date = tree.find('.//pub-date') + self.assertIsNone(pub_date.find('month')) + self.assertIsNone(pub_date.find('day')) + + def test_complete_pub_date_custom_defaults(self): + """Test completing pub-date with custom default values.""" + xml = """
+ + + + 2024 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - - def test_inline_graphic_outside_label_caption_ignored(self): - """Test: inline-graphic outside label/caption is ignored""" - xml = """ - - Title -

-
""" + changes = complete_pub_date(tree, default_day=1, default_month=1) + + # Should add both month and day with custom values + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['value'], '1') + self.assertEqual(changes[1]['value'], '1') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('month'), '1') + self.assertEqual(pub_date.findtext('day'), '1') + + def test_complete_pub_date_invalid_day(self): + """Test that invalid default_day raises ValueError.""" + xml = """
+ + + + 2024 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 0) - self.assertIsNotNone(tree.find(".//inline-graphic")) - - def test_none_xmltree_raises_error(self): - """Test: None xmltree should raise ValueError""" + + # Test day < 1 with self.assertRaises(ValueError) as context: - fix_inline_graphic_in_caption(None) - - self.assertIn("cannot be None", str(context.exception)) - - def test_modification_record_structure(self): - """Test: verifies modification record structure""" - xml = """ - - Title<inline-graphic xlink:href="img1.jpg"/> - """ - tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - mod = mods[0] - - self.assertIn("xpath", mod) - self.assertIn("action", mod) - self.assertIn("old_parent", mod) - self.assertIn("new_parent", mod) - - self.assertEqual(mod["action"], "moved_and_renamed") - self.assertEqual(mod["old_parent"], "title") - self.assertEqual(mod["new_parent"], "fig") - self.assertIsInstance(mod["xpath"], str) - - def test_preserve_text_and_tail(self): - """Test: preserves text and tail of inline-graphic""" - xml = """ - Before<inline-graphic xlink:href="img.jpg"/>After - """ - tree = etree.fromstring(xml) - - inline = tree.find(".//inline-graphic") - original_tail = inline.tail - - fix_inline_graphic_in_caption(tree) - - graphic = tree.find(".//graphic") - self.assertIsNotNone(graphic) - self.assertEqual(graphic.tail, original_tail) - - def test_complex_nested_structure(self): - """Test: complex structure with multiple levels""" - xml = """
- - - - - - Title -

Description

- -
-
- + complete_pub_date(tree, default_day=0, default_month=6) + self.assertIn("default_day must be between 1 and 31", str(context.exception)) + + # Test day > 31 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=32, default_month=6) + self.assertIn("default_day must be between 1 and 31", str(context.exception)) + + def test_complete_pub_date_invalid_month(self): + """Test that invalid default_month raises ValueError.""" + xml = """
+ + + + 2024 + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - fig = tree.find(".//fig") - self.assertIsNotNone(fig.find(".//graphic")) - self.assertIsNone(fig.find(".//inline-graphic")) - - def test_multiple_containers_mixed_scenarios(self): - """Test: multiple containers with mixed scenarios""" - xml = """
- - - - - - - - - - + + # Test month < 1 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=15, default_month=0) + self.assertIn("default_month must be between 1 and 12", str(context.exception)) + + # Test month > 12 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=15, default_month=13) + self.assertIn("default_month must be between 1 and 12", str(context.exception)) + + def test_complete_pub_date_multiple_pub_dates(self): + """Test completing multiple pub-date elements.""" + xml = """
+ + + + 2024 + + + 2023 + 12 + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - # f1: should modify (does not have graphic) - # f2: should not modify (already has graphic) - # t1: should modify (does not have graphic) - self.assertEqual(len(mods), 2) - - f1 = tree.find(".//fig[@id='f1']") - f2 = tree.find(".//fig[@id='f2']") - t1 = tree.find(".//table-wrap[@id='t1']") - - self.assertIsNotNone(f1.find("graphic")) - self.assertIsNone(f1.find(".//inline-graphic")) - - self.assertIsNotNone(f2.find(".//inline-graphic")) - self.assertEqual(len(f2.findall("graphic")), 1) - - self.assertIsNotNone(t1.find("graphic")) - self.assertIsNone(t1.find(".//inline-graphic")) - - def test_no_valid_container_parent(self): - """Test: inline-graphic in non-container element is ignored""" - xml = """
-

- -

- - - + changes = complete_pub_date(tree) + + # Should add month and day to first, day to second + self.assertEqual(len(changes), 3) + + # Verify first pub-date + pub_dates = tree.findall('.//pub-date') + self.assertEqual(pub_dates[0].findtext('month'), '6') + self.assertEqual(pub_dates[0].findtext('day'), '15') + + # Verify second pub-date + self.assertEqual(pub_dates[1].findtext('month'), '12') + self.assertEqual(pub_dates[1].findtext('day'), '15') + + def test_complete_pub_date_no_year(self): + """Test that pub-date without year is not processed.""" + xml = """
+ + + + 6 + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - # XPath does not find these elements because they are not valid containers - self.assertEqual(len(mods), 0) - # inline-graphics remain - self.assertEqual(len(tree.findall(".//inline-graphic")), 2) - - def test_container_only_label_should_modify(self): - """Test: container with only label should be modified""" - xml = """ - - """ + changes = complete_pub_date(tree) + + # Should not process this pub-date + self.assertEqual(len(changes), 0) + + # Verify pub-date is unchanged + pub_date = tree.find('.//pub-date') + self.assertIsNone(pub_date.find('year')) + self.assertIsNone(pub_date.find('day')) + + def test_complete_pub_date_xpath_in_changes(self): + """Test that changes include correct xpath.""" + xml = """
+ + + + 2024 + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) - - def test_container_only_caption_should_modify(self): - """Test: container with only caption should be modified""" - xml = """ - Title<inline-graphic xlink:href="img1.jpg"/> - """ + changes = complete_pub_date(tree) + + # Verify xpath is present in changes + self.assertIn('xpath', changes[0]) + self.assertIn('pub-date', changes[0]['xpath']) + + def test_complete_pub_date_preserves_other_elements(self): + """Test that other elements in pub-date are preserved.""" + xml = """
+ + + + 2024 + Spring + + + +
""" + tree = etree.fromstring(xml) - mods = fix_inline_graphic_in_caption(tree) - - self.assertEqual(len(mods), 1) - self.assertIsNotNone(tree.find(".//graphic")) - self.assertIsNone(tree.find(".//inline-graphic")) + changes = complete_pub_date(tree) + + # Should add month and day + self.assertEqual(len(changes), 2) + + # Verify season is preserved + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('season'), 'Spring') -if __name__ == "__main__": +if __name__ == '__main__': unittest.main()