From 28aa17963719f8cd8efc402ab4ada21a5bee6a2c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:16:15 +0000 Subject: [PATCH 1/2] Initial plan From 5eef8ffaa7142da8c31d329b3c3169d9ac879389 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:21:47 +0000 Subject: [PATCH 2/2] Add complete_pub_date function with comprehensive tests Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/utils/xml_fixer.py | 145 ++++++++++++++ tests/sps/utils/test_xml_fixer.py | 320 ++++++++++++++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 packtools/sps/utils/xml_fixer.py create mode 100644 tests/sps/utils/test_xml_fixer.py diff --git a/packtools/sps/utils/xml_fixer.py b/packtools/sps/utils/xml_fixer.py new file mode 100644 index 000000000..94cd0a9b7 --- /dev/null +++ b/packtools/sps/utils/xml_fixer.py @@ -0,0 +1,145 @@ +""" +XML Fixer utilities for completing and fixing XML structures. + +This module provides utilities to fix and complete XML structures, +particularly for SciELO Publishing Schema (SPS) compliance. +""" + +from lxml import etree + + +def complete_pub_date(xmltree, default_day=15, default_month=6): + """ + Completa elementos pub-date incompletos com valores padrão para day e month. + + Esta função processa elementos no XML e adiciona elementos e + quando estes estão ausentes, mantendo a ordem correta dos elementos (year, month, day). + + Args: + xmltree: Árvore XML (lxml.etree.Element) a ser processada + default_day (int, optional): Dia padrão para completar (1-31). Padrão: 15 + default_month (int, optional): Mês padrão para completar (1-12). Padrão: 6 + + Returns: + list: Lista de dicionários contendo as mudanças realizadas. Cada dicionário tem: + - xpath (str): XPath do elemento pub-date modificado + - element_added (str): Nome do elemento adicionado ('day' ou 'month') + - value (str): Valor adicionado + + Raises: + ValueError: Se default_day não está entre 1-31 ou default_month não está entre 1-12 + + Examples: + >>> from lxml import etree + >>> xml = '''
+ ... + ... + ... + ... 2024 + ... + ... + ... + ...
''' + >>> tree = etree.fromstring(xml) + >>> changes = complete_pub_date(tree) + >>> len(changes) + 2 + >>> changes[0]['element_added'] + 'month' + >>> changes[1]['element_added'] + 'day' + + >>> # Após a execução, o XML terá: + >>> pub_date = tree.find('.//pub-date') + >>> pub_date.findtext('month') + '6' + >>> pub_date.findtext('day') + '15' + + >>> # Com valores personalizados: + >>> tree = etree.fromstring(xml) + >>> changes = complete_pub_date(tree, default_day=1, default_month=1) + >>> pub_date = tree.find('.//pub-date') + >>> pub_date.findtext('month') + '1' + >>> pub_date.findtext('day') + '1' + + >>> # Não modifica elementos já existentes: + >>> xml_complete = '''
+ ... + ... + ... + ... 2024 + ... 3 + ... 20 + ... + ... + ... + ...
''' + >>> tree = etree.fromstring(xml_complete) + >>> changes = complete_pub_date(tree) + >>> len(changes) + 0 + """ + # Validar parâmetros + if not isinstance(default_day, int) or default_day < 1 or default_day > 31: + raise ValueError("default_day must be between 1 and 31") + + if not isinstance(default_month, int) or default_month < 1 or default_month > 12: + raise ValueError("default_month must be between 1 and 12") + + changes = [] + + # Buscar elementos pub-date com pub-type='pub' ou publication-format='electronic' + xpath_query = ( + ".//pub-date[@pub-type='pub'] | " + ".//pub-date[@publication-format='electronic']" + ) + + pub_date_nodes = xmltree.xpath(xpath_query) + + for pub_date_node in pub_date_nodes: + # Obter xpath do elemento para reportar + tree = pub_date_node.getroottree() + xpath = tree.getpath(pub_date_node) + + # Verificar se year existe (necessário para processar) + year_elem = pub_date_node.find('year') + if year_elem is None: + continue + + # Verificar e adicionar month se ausente + month_elem = pub_date_node.find('month') + if month_elem is None: + month_elem = etree.Element('month') + month_elem.text = str(default_month) + + # Inserir após year + year_index = list(pub_date_node).index(year_elem) + pub_date_node.insert(year_index + 1, month_elem) + + changes.append({ + 'xpath': xpath, + 'element_added': 'month', + 'value': str(default_month) + }) + + # Verificar e adicionar day se ausente + day_elem = pub_date_node.find('day') + if day_elem is None: + day_elem = etree.Element('day') + day_elem.text = str(default_day) + + # Inserir após month + month_elem = pub_date_node.find('month') # Atualizar referência + month_index = list(pub_date_node).index(month_elem) + pub_date_node.insert(month_index + 1, day_elem) + + changes.append({ + 'xpath': xpath, + 'element_added': 'day', + 'value': str(default_day) + }) + + return changes diff --git a/tests/sps/utils/test_xml_fixer.py b/tests/sps/utils/test_xml_fixer.py new file mode 100644 index 000000000..2b78b0ebb --- /dev/null +++ b/tests/sps/utils/test_xml_fixer.py @@ -0,0 +1,320 @@ +# coding: utf-8 +import unittest + +from lxml import etree + +from packtools.sps.utils.xml_fixer import complete_pub_date + + +class TestCompletePubDate(unittest.TestCase): + """Test suite for complete_pub_date function.""" + + def test_complete_pub_date_only_year(self): + """Test completing pub-date with only year element.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should add both month and day + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['element_added'], 'month') + self.assertEqual(changes[0]['value'], '6') + self.assertEqual(changes[1]['element_added'], 'day') + self.assertEqual(changes[1]['value'], '15') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '6') + self.assertEqual(pub_date.findtext('day'), '15') + + # Verify order: year, month, day + elements = [elem.tag for elem in pub_date] + self.assertEqual(elements, ['year', 'month', 'day']) + + def test_complete_pub_date_year_and_month(self): + """Test completing pub-date with year and month elements.""" + xml = """
+ + + + 2024 + 3 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should add only day + self.assertEqual(len(changes), 1) + self.assertEqual(changes[0]['element_added'], 'day') + self.assertEqual(changes[0]['value'], '15') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '3') + self.assertEqual(pub_date.findtext('day'), '15') + + # Verify order + elements = [elem.tag for elem in pub_date] + self.assertEqual(elements, ['year', 'month', 'day']) + + def test_complete_pub_date_already_complete(self): + """Test that complete pub-date is not modified.""" + xml = """
+ + + + 2024 + 3 + 20 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should not add anything + self.assertEqual(len(changes), 0) + + # Verify XML structure unchanged + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('year'), '2024') + self.assertEqual(pub_date.findtext('month'), '3') + self.assertEqual(pub_date.findtext('day'), '20') + + def test_complete_pub_date_with_publication_format_electronic(self): + """Test completing pub-date with publication-format='electronic'.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should add both month and day + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['element_added'], 'month') + self.assertEqual(changes[1]['element_added'], 'day') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('month'), '6') + self.assertEqual(pub_date.findtext('day'), '15') + + def test_complete_pub_date_ignores_other_pub_types(self): + """Test that pub-date with other pub-types are ignored.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should not add anything + self.assertEqual(len(changes), 0) + + # Verify pub-date is unchanged + pub_date = tree.find('.//pub-date') + self.assertIsNone(pub_date.find('month')) + self.assertIsNone(pub_date.find('day')) + + def test_complete_pub_date_custom_defaults(self): + """Test completing pub-date with custom default values.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree, default_day=1, default_month=1) + + # Should add both month and day with custom values + self.assertEqual(len(changes), 2) + self.assertEqual(changes[0]['value'], '1') + self.assertEqual(changes[1]['value'], '1') + + # Verify XML structure + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('month'), '1') + self.assertEqual(pub_date.findtext('day'), '1') + + def test_complete_pub_date_invalid_day(self): + """Test that invalid default_day raises ValueError.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + + # Test day < 1 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=0, default_month=6) + self.assertIn("default_day must be between 1 and 31", str(context.exception)) + + # Test day > 31 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=32, default_month=6) + self.assertIn("default_day must be between 1 and 31", str(context.exception)) + + def test_complete_pub_date_invalid_month(self): + """Test that invalid default_month raises ValueError.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + + # Test month < 1 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=15, default_month=0) + self.assertIn("default_month must be between 1 and 12", str(context.exception)) + + # Test month > 12 + with self.assertRaises(ValueError) as context: + complete_pub_date(tree, default_day=15, default_month=13) + self.assertIn("default_month must be between 1 and 12", str(context.exception)) + + def test_complete_pub_date_multiple_pub_dates(self): + """Test completing multiple pub-date elements.""" + xml = """
+ + + + 2024 + + + 2023 + 12 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should add month and day to first, day to second + self.assertEqual(len(changes), 3) + + # Verify first pub-date + pub_dates = tree.findall('.//pub-date') + self.assertEqual(pub_dates[0].findtext('month'), '6') + self.assertEqual(pub_dates[0].findtext('day'), '15') + + # Verify second pub-date + self.assertEqual(pub_dates[1].findtext('month'), '12') + self.assertEqual(pub_dates[1].findtext('day'), '15') + + def test_complete_pub_date_no_year(self): + """Test that pub-date without year is not processed.""" + xml = """
+ + + + 6 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should not process this pub-date + self.assertEqual(len(changes), 0) + + # Verify pub-date is unchanged + pub_date = tree.find('.//pub-date') + self.assertIsNone(pub_date.find('year')) + self.assertIsNone(pub_date.find('day')) + + def test_complete_pub_date_xpath_in_changes(self): + """Test that changes include correct xpath.""" + xml = """
+ + + + 2024 + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Verify xpath is present in changes + self.assertIn('xpath', changes[0]) + self.assertIn('pub-date', changes[0]['xpath']) + + def test_complete_pub_date_preserves_other_elements(self): + """Test that other elements in pub-date are preserved.""" + xml = """
+ + + + 2024 + Spring + + + +
""" + + tree = etree.fromstring(xml) + changes = complete_pub_date(tree) + + # Should add month and day + self.assertEqual(len(changes), 2) + + # Verify season is preserved + pub_date = tree.find('.//pub-date') + self.assertEqual(pub_date.findtext('season'), 'Spring') + + +if __name__ == '__main__': + unittest.main()