From 81e2f355326ff98ed4180b1fa540f845581984fe Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Fri, 12 Sep 2025 12:08:36 +0200 Subject: [PATCH 01/17] Added recursive, basic version of IOService parser --- src/sysdiagnose/parsers/ioservice.py | 147 +++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 src/sysdiagnose/parsers/ioservice.py diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py new file mode 100644 index 0000000..1c34f17 --- /dev/null +++ b/src/sysdiagnose/parsers/ioservice.py @@ -0,0 +1,147 @@ +#! /usr/bin/env python3 + +import os +import string +from tokenize import String +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger, Event +from datetime import datetime + + +class DemoParser(BaseParserInterface): + description = "Demo parsers" + format = "json" # by default json, use jsonl for event-based data + rollback_addr = None + line = None + open_file = None + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IOServiceTestData.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + ''' + this is the function that will be called + ''' + result = [] + log_files = self.get_log_files() + for log_file in log_files: + entry = {} + try: + timestamp = datetime.strptime('1980-01-01 12:34:56.001 +00:00', '%Y-%m-%d %H:%M:%S.%f %z') # moment of interest + event = Event( + datetime=timestamp, + message=f"Demo event from {log_file}", # String with an informative message of the event + module=self.module_name, + timestamp_desc='Demo timestamp', # String explaining what type of timestamp it is for example file created + ) + + self.parse_file(log_file) + + result.append(event.to_dict()) + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + logger.debug(f"Entry details {str(entry)}", extra={'entry': str(entry)}) + if not entry: + logger.warning("Empty entry.") + + except Exception: + logger.exception("Got an exception !") + + return result + + def parse_file(self, file: string): + """ IOService file notes + + # Regex for +-o starting at start of file -> 1213 results + (\s|\|)*\+-o + + # Regex for ALL +-o - 1213 results + \+-o + + So we know that the data doesn't contain the node identifier ('+-o') + + """ + print('===============================') + with open(file, 'r') as f: + self.open_file = f + self.recursive_fun() + self.open_file = None + print('===============================') + + def get_line(self): + self.rollback_addr = self.open_file.tell() + self.line = self.open_file.readline().replace('\n', '') + + def recursive_call(self): + self.open_file.seek(self.rollback_addr) + self.recursive_fun() + + def check_start_node(self): + if '+-o' not in self.line: + logger.error('This is not normal. Recursive function called on random line.') + exit(1) + + def not_empty_node_check(self): + if not self.rollback_addr: + logger.error("+-o in two consecutive lines, not supposed to be possible") + exit(1) + + def iterate_children(self, depth): + while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'): + if self.line[depth: depth+3] == '+-o': + self.recursive_call() + + else: + self.get_line() + + def fetch_node_data(self): + while '+-o' not in self.line: + if not self.line: + return False # end of file + + node_data = [] # array of lines, to be transformed in json + node_data.append(self.line) + self.get_line() + + return True + + def recursive_fun(self): + is_leaf = False + self.get_line() + + # check if we're at the start of a node + self.check_start_node() + + node_name = self.line.split("+-o")[1].strip() + print("Node : ", node_name) + depth = self.line.index('o') # to identify the other nodes that have the same parent + self.get_line() + + # check if its a leaf + if self.line[depth] != '|': + is_leaf = True + + # Fetch the data of the node + if not self.fetch_node_data(): + return # EOF + + # stop if we're a leaf + if is_leaf: + self.open_file.seek(self.rollback_addr) + return + + # sanity check + self.not_empty_node_check() + + # going back one line to retrieve the node title line + self.recursive_call() + self.get_line() + + # Iterates over each child to call the current function + self.iterate_children(depth) + + + + From 2a7131fc620fb5381771bc3ee2d467f2759d2f09 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Fri, 12 Sep 2025 16:53:43 +0200 Subject: [PATCH 02/17] IOService parser now recreates the tree in a json format --- src/sysdiagnose/parsers/ioservice.py | 100 ++++++++++++++++----------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index 1c34f17..2030051 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -1,18 +1,22 @@ #! /usr/bin/env python3 +import array +from ctypes import Array import os import string from tokenize import String -from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger, Event +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger from datetime import datetime +import re -class DemoParser(BaseParserInterface): - description = "Demo parsers" - format = "json" # by default json, use jsonl for event-based data +class IOServiceParser(BaseParserInterface): + description = "IOService.txt file parser" + format = "json" rollback_addr = None line = None open_file = None + def __init__(self, config: SysdiagnoseConfig, case_id: str): super().__init__(__file__, config, case_id) @@ -22,36 +26,20 @@ def get_log_files(self) -> list: return [os.path.join(self.case_data_subfolder, log_file)] def execute(self) -> list | dict: - ''' - this is the function that will be called - ''' - result = [] log_files = self.get_log_files() + data_tree = {} + for log_file in log_files: - entry = {} try: - timestamp = datetime.strptime('1980-01-01 12:34:56.001 +00:00', '%Y-%m-%d %H:%M:%S.%f %z') # moment of interest - event = Event( - datetime=timestamp, - message=f"Demo event from {log_file}", # String with an informative message of the event - module=self.module_name, - timestamp_desc='Demo timestamp', # String explaining what type of timestamp it is for example file created - ) - - self.parse_file(log_file) - - result.append(event.to_dict()) logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) - logger.debug(f"Entry details {str(entry)}", extra={'entry': str(entry)}) - if not entry: - logger.warning("Empty entry.") + self.parse_file(log_file, data_tree) except Exception: logger.exception("Got an exception !") - return result + return data_tree - def parse_file(self, file: string): + def parse_file(self, file: str, data_tree: dict): """ IOService file notes # Regex for +-o starting at start of file -> 1213 results @@ -66,7 +54,7 @@ def parse_file(self, file: string): print('===============================') with open(file, 'r') as f: self.open_file = f - self.recursive_fun() + self.recursive_fun(data_tree) self.open_file = None print('===============================') @@ -74,9 +62,9 @@ def get_line(self): self.rollback_addr = self.open_file.tell() self.line = self.open_file.readline().replace('\n', '') - def recursive_call(self): + def recursive_call(self, data_tree: dict): self.open_file.seek(self.rollback_addr) - self.recursive_fun() + self.recursive_fun(data_tree) def check_start_node(self): if '+-o' not in self.line: @@ -88,26 +76,56 @@ def not_empty_node_check(self): logger.error("+-o in two consecutive lines, not supposed to be possible") exit(1) - def iterate_children(self, depth): + def iterate_children(self, depth: int, data_tree_list: list[dict]): while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'): if self.line[depth: depth+3] == '+-o': - self.recursive_call() + data_tree_list.append({}) + self.recursive_call(data_tree_list[-1]) else: self.get_line() - def fetch_node_data(self): + def check_key_uniqueness(self, dictio, key): + if dictio.get(key): + logger.warning('Key is already in dictionary, data may be lost') + + def fetch_node_data(self, data_tree): + node_data = [] # array of lines, to be transformed in json + res = True + while '+-o' not in self.line: - if not self.line: - return False # end of file + if not self.line: # end of file + res = False + break - node_data = [] # array of lines, to be transformed in json node_data.append(self.line) self.get_line() - return True + data_tree['Data'] = self.node_data_to_json(node_data) + return res + + def node_data_to_json(self, data_array: list[str]) -> dict: + res = {} + for data in data_array: + # remove spaces and pipes at start + clean_line = re.sub('^(\s|\|)*', '', data) + + if '=' not in clean_line: + continue + + # split at the first equal only + key, value = clean_line.split('=', 1) + + # remove first and last " (in case the key has more quotes inside) + key = key.replace('"', '', 1) + key = key[::-1].replace('"', '', 1)[::-1] + + self.check_key_uniqueness(res, key) + res[key.strip()] = value.strip() + + return res - def recursive_fun(self): + def recursive_fun(self, data_tree: dict): is_leaf = False self.get_line() @@ -116,6 +134,8 @@ def recursive_fun(self): node_name = self.line.split("+-o")[1].strip() print("Node : ", node_name) + data_tree['Name'] = node_name + data_tree['Children'] = [] depth = self.line.index('o') # to identify the other nodes that have the same parent self.get_line() @@ -124,7 +144,7 @@ def recursive_fun(self): is_leaf = True # Fetch the data of the node - if not self.fetch_node_data(): + if not self.fetch_node_data(data_tree): return # EOF # stop if we're a leaf @@ -135,12 +155,8 @@ def recursive_fun(self): # sanity check self.not_empty_node_check() - # going back one line to retrieve the node title line - self.recursive_call() - self.get_line() - # Iterates over each child to call the current function - self.iterate_children(depth) + self.iterate_children(depth, data_tree['Children']) From 333cbdaa391fa7716ad55703d9c612bfef270005 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Fri, 12 Sep 2025 18:26:37 +0200 Subject: [PATCH 03/17] IOService parser now handles anomalies --- src/sysdiagnose/parsers/ioservice.py | 34 +++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index 2030051..3319ed4 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -22,7 +22,7 @@ def __init__(self, config: SysdiagnoseConfig, case_id: str): super().__init__(__file__, config, case_id) def get_log_files(self) -> list: - log_file = "ioreg/IOServiceTestData.txt" + log_file = "ioreg/IOServiceTestData2.txt" return [os.path.join(self.case_data_subfolder, log_file)] def execute(self) -> list | dict: @@ -35,7 +35,7 @@ def execute(self) -> list | dict: self.parse_file(log_file, data_tree) except Exception: - logger.exception("Got an exception !") + logger.exception("IOService parsing crashed") return data_tree @@ -51,12 +51,10 @@ def parse_file(self, file: str, data_tree: dict): So we know that the data doesn't contain the node identifier ('+-o') """ - print('===============================') with open(file, 'r') as f: self.open_file = f self.recursive_fun(data_tree) self.open_file = None - print('===============================') def get_line(self): self.rollback_addr = self.open_file.tell() @@ -104,9 +102,33 @@ def fetch_node_data(self, data_tree): data_tree['Data'] = self.node_data_to_json(node_data) return res + def handle_anomalies(self, dictio, data, key): + """ + some values overflow on the few next lines + this condition assumes there is no '=' in the exceeding data + (which was the case up to what I saw) + + p.s. : if you wonder why cond4 is necessary, it is only for + the last leaf, which has no '|' symbols. without cond4, + these lines would be seen as anomalies + """ + cond1 = not re.search('^\s*\|+', data) + cond2 = len(data.strip()) > 0 + cond3 = data.strip() not in ('{', '}') + cond4 = '=' not in data + + if cond1 and cond2 and cond3 and cond4: + dictio[key] += data.strip() + return True + return False + def node_data_to_json(self, data_array: list[str]) -> dict: res = {} + key = None + for data in data_array: + self.handle_anomalies(res, data, key) + # remove spaces and pipes at start clean_line = re.sub('^(\s|\|)*', '', data) @@ -119,9 +141,10 @@ def node_data_to_json(self, data_array: list[str]) -> dict: # remove first and last " (in case the key has more quotes inside) key = key.replace('"', '', 1) key = key[::-1].replace('"', '', 1)[::-1] + key = key.strip() self.check_key_uniqueness(res, key) - res[key.strip()] = value.strip() + res[key] = value.strip() return res @@ -133,7 +156,6 @@ def recursive_fun(self, data_tree: dict): self.check_start_node() node_name = self.line.split("+-o")[1].strip() - print("Node : ", node_name) data_tree['Name'] = node_name data_tree['Children'] = [] depth = self.line.index('o') # to identify the other nodes that have the same parent From 7484620bac12b612b720194da145a6554540429f Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Mon, 15 Sep 2025 11:28:20 +0200 Subject: [PATCH 04/17] Fixed an anomaly where a non-ascii byte stands in the IOService.txt file --- src/sysdiagnose/parsers/ioservice.py | 59 ++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index 3319ed4..9d68075 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -2,8 +2,8 @@ import array from ctypes import Array +from io import BufferedReader import os -import string from tokenize import String from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger from datetime import datetime @@ -22,7 +22,7 @@ def __init__(self, config: SysdiagnoseConfig, case_id: str): super().__init__(__file__, config, case_id) def get_log_files(self) -> list: - log_file = "ioreg/IOServiceTestData2.txt" + log_file = "ioreg/IOService.txt" return [os.path.join(self.case_data_subfolder, log_file)] def execute(self) -> list | dict: @@ -51,14 +51,41 @@ def parse_file(self, file: str, data_tree: dict): So we know that the data doesn't contain the node identifier ('+-o') """ - with open(file, 'r') as f: + with open(file, 'rb') as f: self.open_file = f self.recursive_fun(data_tree) self.open_file = None def get_line(self): self.rollback_addr = self.open_file.tell() - self.line = self.open_file.readline().replace('\n', '') + self.line = self.safe_readline(self.open_file) + self.line = self.line.replace('\n', '') + + def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'): + """ + Simulates readline() in binary mode, replacing non-ASCII bytes. + + This fixes an anomaly where a non-ascii (non-utf-8-) byte is present in the IOService.txt file + (line 10797 in the testdata) + """ + buffer = "" + + while True: + byte = open_file.read(1) + + if not byte: # EOF + return buffer + + if byte == b'\n': + return buffer + else: + # Check if ASCII (0–127), else replace + if byte[0] < 128: + buffer += chr(byte[0]) + else: + buffer += replacement_char[0] + #buffer.append(byte[0] if ord(byte[0]) < 128 else replacement_byte[0]) + def recursive_call(self, data_tree: dict): self.open_file.seek(self.rollback_addr) @@ -74,20 +101,11 @@ def not_empty_node_check(self): logger.error("+-o in two consecutive lines, not supposed to be possible") exit(1) - def iterate_children(self, depth: int, data_tree_list: list[dict]): - while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'): - if self.line[depth: depth+3] == '+-o': - data_tree_list.append({}) - self.recursive_call(data_tree_list[-1]) - - else: - self.get_line() - - def check_key_uniqueness(self, dictio, key): + def check_key_uniqueness(self, dictio: dict, key: str): if dictio.get(key): logger.warning('Key is already in dictionary, data may be lost') - def fetch_node_data(self, data_tree): + def fetch_node_data(self, data_tree: dict) -> bool: node_data = [] # array of lines, to be transformed in json res = True @@ -102,7 +120,7 @@ def fetch_node_data(self, data_tree): data_tree['Data'] = self.node_data_to_json(node_data) return res - def handle_anomalies(self, dictio, data, key): + def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool: """ some values overflow on the few next lines this condition assumes there is no '=' in the exceeding data @@ -148,6 +166,15 @@ def node_data_to_json(self, data_array: list[str]) -> dict: return res + def iterate_children(self, depth: int, data_tree_list: list[dict]): + while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'): + if self.line[depth: depth+3] == '+-o': + data_tree_list.append({}) + self.recursive_call(data_tree_list[-1]) + + else: + self.get_line() + def recursive_fun(self, data_tree: dict): is_leaf = False self.get_line() From 5e03cecf40bbd5483d240e3bf0281f1c01528b71 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Mon, 15 Sep 2025 12:31:50 +0200 Subject: [PATCH 05/17] Added unit testing for ioservice parser --- .vscode/launch.json | 8 + tests/test_parsers_ioservice.py | 363 ++++++++++++++++++++++++++++++++ 2 files changed, 371 insertions(+) create mode 100644 tests/test_parsers_ioservice.py diff --git a/.vscode/launch.json b/.vscode/launch.json index a514352..a2be49f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -316,6 +316,14 @@ "module": "sysdiagnose.__main__", "args": "-c public parse swcutil", "cwd": "${workspaceFolder}/" + }, + { + "name": "Python Debugger: parse ioservice", + "type": "debugpy", + "request": "launch", + "module": "sysdiagnose.__main__", + "args": "-c public -l DEBUG parse ioservice", + "cwd": "${workspaceFolder}/" } ] } \ No newline at end of file diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py new file mode 100644 index 0000000..2243d19 --- /dev/null +++ b/tests/test_parsers_ioservice.py @@ -0,0 +1,363 @@ +from sysdiagnose.parsers.ioservice import IOServiceParser +from tests import SysdiagnoseTestCase +import unittest +import io + + +class TestParsersIOService(SysdiagnoseTestCase): + + def test_basic_structure(self): + for case_id, _ in self.sd.cases().items(): + p = IOServiceParser(self.sd.config, case_id=case_id) + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.BytesIO(b"""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") + + expected = { + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value l4\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" + }, + { + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" + }, + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" + } + ], + "Data": { + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" + }, + "Name": "Node 2" + } + ], + "Data": { + "data 1": "\"value 1\"", + "data 2": "\"value 2\"" + }, + "Name": "Root node" + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + for case_id, _ in self.sd.cases().items(): + p = IOServiceParser(self.sd.config, case_id=case_id) + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.BytesIO(b"""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") + + expected = { + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value aaaabbbbccccdddd\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" + }, + { + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" + }, + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" + } + ], + "Data": { + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" + }, + "Name": "Node 2" + } + ], + "Data": { + "data 1": "\"value 1\"", + "data 2": "\"value 2\"" + }, + "Name": "Root node" + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + for case_id, _ in self.sd.cases().items(): + p = IOServiceParser(self.sd.config, case_id=case_id) + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.BytesIO(b"""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") + + expected = { + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value -->?<--\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" + }, + { + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" + }, + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" + } + ], + "Data": { + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" + }, + "Name": "Node 2" + } + ], + "Data": { + "data 1": "\"value 1\"", + "data 2": "\"value 2\"" + }, + "Name": "Root node" + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() From 25145b8a8284bf5eb5a5ef112a5da9b63260715e Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Mon, 15 Sep 2025 14:22:05 +0200 Subject: [PATCH 06/17] Fixed codestyling of ioservice and its unit testing --- src/sysdiagnose/parsers/ioservice.py | 51 +++--- tests/test_parsers_ioservice.py | 252 +++++++++++++-------------- 2 files changed, 146 insertions(+), 157 deletions(-) diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index 9d68075..cad812d 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -1,13 +1,9 @@ #! /usr/bin/env python3 -import array -from ctypes import Array from io import BufferedReader import os -from tokenize import String -from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger -from datetime import datetime import re +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger class IOServiceParser(BaseParserInterface): @@ -16,7 +12,6 @@ class IOServiceParser(BaseParserInterface): rollback_addr = None line = None open_file = None - def __init__(self, config: SysdiagnoseConfig, case_id: str): super().__init__(__file__, config, case_id) @@ -33,12 +28,12 @@ def execute(self) -> list | dict: try: logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) self.parse_file(log_file, data_tree) - + except Exception: logger.exception("IOService parsing crashed") - + return data_tree - + def parse_file(self, file: str, data_tree: dict): """ IOService file notes @@ -50,7 +45,7 @@ def parse_file(self, file: str, data_tree: dict): So we know that the data doesn't contain the node identifier ('+-o') - """ + """ # noqa: W605 with open(file, 'rb') as f: self.open_file = f self.recursive_fun(data_tree) @@ -73,7 +68,7 @@ def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'): while True: byte = open_file.read(1) - if not byte: # EOF + if not byte: # EOF return buffer if byte == b'\n': @@ -84,8 +79,6 @@ def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'): buffer += chr(byte[0]) else: buffer += replacement_char[0] - #buffer.append(byte[0] if ord(byte[0]) < 128 else replacement_byte[0]) - def recursive_call(self, data_tree: dict): self.open_file.seek(self.rollback_addr) @@ -106,31 +99,31 @@ def check_key_uniqueness(self, dictio: dict, key: str): logger.warning('Key is already in dictionary, data may be lost') def fetch_node_data(self, data_tree: dict) -> bool: - node_data = [] # array of lines, to be transformed in json + node_data = [] # array of lines, to be transformed in json res = True while '+-o' not in self.line: if not self.line: # end of file res = False break - + node_data.append(self.line) self.get_line() data_tree['Data'] = self.node_data_to_json(node_data) return res - + def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool: """ some values overflow on the few next lines this condition assumes there is no '=' in the exceeding data (which was the case up to what I saw) - p.s. : if you wonder why cond4 is necessary, it is only for + p.s. : if you wonder why cond4 is necessary, it is only for the last leaf, which has no '|' symbols. without cond4, these lines would be seen as anomalies """ - cond1 = not re.search('^\s*\|+', data) + cond1 = not re.search(r'^\s*\|+', data) cond2 = len(data.strip()) > 0 cond3 = data.strip() not in ('{', '}') cond4 = '=' not in data @@ -139,7 +132,7 @@ def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool: dictio[key] += data.strip() return True return False - + def node_data_to_json(self, data_array: list[str]) -> dict: res = {} key = None @@ -148,7 +141,7 @@ def node_data_to_json(self, data_array: list[str]) -> dict: self.handle_anomalies(res, data, key) # remove spaces and pipes at start - clean_line = re.sub('^(\s|\|)*', '', data) + clean_line = re.sub(r'^(\s|\|)*', '', data) if '=' not in clean_line: continue @@ -163,18 +156,18 @@ def node_data_to_json(self, data_array: list[str]) -> dict: self.check_key_uniqueness(res, key) res[key] = value.strip() - + return res - + def iterate_children(self, depth: int, data_tree_list: list[dict]): - while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'): - if self.line[depth: depth+3] == '+-o': + while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'): + if self.line[depth: depth + 3] == '+-o': data_tree_list.append({}) self.recursive_call(data_tree_list[-1]) else: self.get_line() - + def recursive_fun(self, data_tree: dict): is_leaf = False self.get_line() @@ -185,7 +178,7 @@ def recursive_fun(self, data_tree: dict): node_name = self.line.split("+-o")[1].strip() data_tree['Name'] = node_name data_tree['Children'] = [] - depth = self.line.index('o') # to identify the other nodes that have the same parent + depth = self.line.index('o') # to identify the other nodes that have the same parent self.get_line() # check if its a leaf @@ -200,13 +193,9 @@ def recursive_fun(self, data_tree: dict): if is_leaf: self.open_file.seek(self.rollback_addr) return - + # sanity check self.not_empty_node_check() # Iterates over each child to call the current function self.iterate_children(depth, data_tree['Children']) - - - - diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py index 2243d19..b402aca 100644 --- a/tests/test_parsers_ioservice.py +++ b/tests/test_parsers_ioservice.py @@ -9,7 +9,7 @@ class TestParsersIOService(SysdiagnoseTestCase): def test_basic_structure(self): for case_id, _ in self.sd.cases().items(): p = IOServiceParser(self.sd.config, case_id=case_id) - + # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') start_file = io.BytesIO(b"""+-o Root node @@ -54,59 +54,59 @@ def test_basic_structure(self): "data 52" = "value 52" } -""") - +""") # noqa: W291, W293 + expected = { "Children": [ { - "Children": [ - { "Children": [ { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value l4\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" }, { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value l4\"" + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" }, - "Name": "Leaf 2" + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" } ], "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" - }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" - }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" - }, - "Name": "Node 2" + "Name": "Node 2" } ], "Data": { @@ -125,7 +125,7 @@ def test_basic_structure(self): def test_value_overflow_anomaly(self): for case_id, _ in self.sd.cases().items(): p = IOServiceParser(self.sd.config, case_id=case_id) - + # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') start_file = io.BytesIO(b"""+-o Root node @@ -174,59 +174,59 @@ def test_value_overflow_anomaly(self): "data 52" = "value 52" } -""") - +""") # noqa: W291, W293 + expected = { "Children": [ { - "Children": [ - { "Children": [ { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value aaaabbbbccccdddd\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" }, { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value aaaabbbbccccdddd\"" + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" }, - "Name": "Leaf 2" + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" } ], "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" - }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" - }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" - }, - "Name": "Node 2" + "Name": "Node 2" } ], "Data": { @@ -245,7 +245,7 @@ def test_value_overflow_anomaly(self): def test_non_ascii_byte_anomaly(self): for case_id, _ in self.sd.cases().items(): p = IOServiceParser(self.sd.config, case_id=case_id) - + # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') start_file = io.BytesIO(b"""+-o Root node @@ -290,59 +290,59 @@ def test_non_ascii_byte_anomaly(self): "data 52" = "value 52" } -""") - +""") # noqa: W291, W293 + expected = { "Children": [ { - "Children": [ - { "Children": [ { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" + "Children": [ + { + "Children": [], + "Data": { + "data l1": "\"value l1\"", + "data l2": "\"value l2\"" + }, + "Name": "Leaf 1" + }, + { + "Children": [], + "Data": { + "data l3": "\"value l3\"", + "data l4": "\"value -->?<--\"" + }, + "Name": "Leaf 2" + } + ], + "Data": { + "data 31": "\"value 31\"", + "data 32": "\"value 32\"" + }, + "Name": "Node 3" }, { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value -->?<--\"" + "Children": [], + "Data": { + "data l5": "\"value L5\"", + "data l6": "\"value l6\"" + }, + "Name": "Leaf 3" }, - "Name": "Leaf 2" + { + "Children": [], + "Data": { + "data 51": "\"value 51\"", + "data 52": "\"value 52\"" + }, + "Name": "Leaf 4" } ], "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" - }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" - }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" + "#address-cells": "<02000000>", + "AAPL,phandle": "<01000000>" }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" - }, - "Name": "Node 2" + "Name": "Node 2" } ], "Data": { From ebb74a67d492e9e1b4fe77f98deec0519e49e06e Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Tue, 16 Sep 2025 17:42:37 +0200 Subject: [PATCH 07/17] Small refactor of the resulting data structure in the ioservice parser --- src/sysdiagnose/parsers/ioservice.py | 179 ++---------------- .../utils/ioreg_parsers/string_parser.py | 9 + .../utils/ioreg_parsers/structure_parser.py | 172 +++++++++++++++++ 3 files changed, 192 insertions(+), 168 deletions(-) create mode 100644 src/sysdiagnose/utils/ioreg_parsers/string_parser.py create mode 100644 src/sysdiagnose/utils/ioreg_parsers/structure_parser.py diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index cad812d..b0c4ec3 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -1,17 +1,13 @@ #! /usr/bin/env python3 -from io import BufferedReader import os -import re from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser class IOServiceParser(BaseParserInterface): description = "IOService.txt file parser" format = "json" - rollback_addr = None - line = None - open_file = None def __init__(self, config: SysdiagnoseConfig, case_id: str): super().__init__(__file__, config, case_id) @@ -21,20 +17,6 @@ def get_log_files(self) -> list: return [os.path.join(self.case_data_subfolder, log_file)] def execute(self) -> list | dict: - log_files = self.get_log_files() - data_tree = {} - - for log_file in log_files: - try: - logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) - self.parse_file(log_file, data_tree) - - except Exception: - logger.exception("IOService parsing crashed") - - return data_tree - - def parse_file(self, file: str, data_tree: dict): """ IOService file notes # Regex for +-o starting at start of file -> 1213 results @@ -46,156 +28,17 @@ def parse_file(self, file: str, data_tree: dict): So we know that the data doesn't contain the node identifier ('+-o') """ # noqa: W605 - with open(file, 'rb') as f: - self.open_file = f - self.recursive_fun(data_tree) - self.open_file = None - - def get_line(self): - self.rollback_addr = self.open_file.tell() - self.line = self.safe_readline(self.open_file) - self.line = self.line.replace('\n', '') - - def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'): - """ - Simulates readline() in binary mode, replacing non-ASCII bytes. - - This fixes an anomaly where a non-ascii (non-utf-8-) byte is present in the IOService.txt file - (line 10797 in the testdata) - """ - buffer = "" - - while True: - byte = open_file.read(1) - - if not byte: # EOF - return buffer - - if byte == b'\n': - return buffer - else: - # Check if ASCII (0–127), else replace - if byte[0] < 128: - buffer += chr(byte[0]) - else: - buffer += replacement_char[0] - - def recursive_call(self, data_tree: dict): - self.open_file.seek(self.rollback_addr) - self.recursive_fun(data_tree) - - def check_start_node(self): - if '+-o' not in self.line: - logger.error('This is not normal. Recursive function called on random line.') - exit(1) - - def not_empty_node_check(self): - if not self.rollback_addr: - logger.error("+-o in two consecutive lines, not supposed to be possible") - exit(1) - - def check_key_uniqueness(self, dictio: dict, key: str): - if dictio.get(key): - logger.warning('Key is already in dictionary, data may be lost') - - def fetch_node_data(self, data_tree: dict) -> bool: - node_data = [] # array of lines, to be transformed in json - res = True - - while '+-o' not in self.line: - if not self.line: # end of file - res = False - break - - node_data.append(self.line) - self.get_line() - - data_tree['Data'] = self.node_data_to_json(node_data) - return res - def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool: - """ - some values overflow on the few next lines - this condition assumes there is no '=' in the exceeding data - (which was the case up to what I saw) - - p.s. : if you wonder why cond4 is necessary, it is only for - the last leaf, which has no '|' symbols. without cond4, - these lines would be seen as anomalies - """ - cond1 = not re.search(r'^\s*\|+', data) - cond2 = len(data.strip()) > 0 - cond3 = data.strip() not in ('{', '}') - cond4 = '=' not in data - - if cond1 and cond2 and cond3 and cond4: - dictio[key] += data.strip() - return True - return False - - def node_data_to_json(self, data_array: list[str]) -> dict: - res = {} - key = None - - for data in data_array: - self.handle_anomalies(res, data, key) - - # remove spaces and pipes at start - clean_line = re.sub(r'^(\s|\|)*', '', data) - - if '=' not in clean_line: - continue - - # split at the first equal only - key, value = clean_line.split('=', 1) - - # remove first and last " (in case the key has more quotes inside) - key = key.replace('"', '', 1) - key = key[::-1].replace('"', '', 1)[::-1] - key = key.strip() - - self.check_key_uniqueness(res, key) - res[key] = value.strip() - - return res - - def iterate_children(self, depth: int, data_tree_list: list[dict]): - while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'): - if self.line[depth: depth + 3] == '+-o': - data_tree_list.append({}) - self.recursive_call(data_tree_list[-1]) - - else: - self.get_line() - - def recursive_fun(self, data_tree: dict): - is_leaf = False - self.get_line() - - # check if we're at the start of a node - self.check_start_node() - - node_name = self.line.split("+-o")[1].strip() - data_tree['Name'] = node_name - data_tree['Children'] = [] - depth = self.line.index('o') # to identify the other nodes that have the same parent - self.get_line() - - # check if its a leaf - if self.line[depth] != '|': - is_leaf = True - - # Fetch the data of the node - if not self.fetch_node_data(data_tree): - return # EOF + log_files = self.get_log_files() + data_tree = {} - # stop if we're a leaf - if is_leaf: - self.open_file.seek(self.rollback_addr) - return + for log_file in log_files: + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + parser = IORegStructParser() + data_tree = parser.get_dict(log_file) - # sanity check - self.not_empty_node_check() + except Exception: + logger.exception("IOService parsing crashed") - # Iterates over each child to call the current function - self.iterate_children(depth, data_tree['Children']) + return data_tree diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py new file mode 100644 index 0000000..a5e9eaf --- /dev/null +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -0,0 +1,9 @@ + + +class IORegStringParser: + def __init__(self): + pass + + def get_parsed(self, input_string: str): + list_of_elements = input_string.split(',') + return {"a": "b", "c": "d"} diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py new file mode 100644 index 0000000..ff06c10 --- /dev/null +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -0,0 +1,172 @@ +from sysdiagnose.utils.base import logger +from sysdiagnose.utils.ioreg_parsers.string_parser import IORegStringParser +import re + +class IORegStructParser: + rollback_addr = None + line = None + + def __init__(self): + pass + + def get_dict(self, file_path): + data_tree = {} + + with open(file_path, 'r', errors='backslashreplace') as f: + self.open_file = f + self.recursive_fun(data_tree) + + return data_tree + + def get_line(self): + self.rollback_addr = self.open_file.tell() + self.line = self.open_file.readline() + self.line = self.line.replace('\n', '') + + def recursive_call(self, data_tree: dict): + self.open_file.seek(self.rollback_addr) + self.recursive_fun(data_tree) + + def check_start_node(self): + if '+-o' not in self.line: + logger.error('This is not normal. Recursive function called on random line.') + exit(1) + + def not_empty_node_check(self): + if not self.rollback_addr: + logger.error("+-o in two consecutive lines, not supposed to be possible") + exit(1) + + def check_key_uniqueness(self, dictio: dict, key: str): + if dictio.get(key): + logger.warning('Key is already in dictionary, data may be lost') + + def fetch_node_data(self, data_tree: dict) -> bool: + node_data = [] # array of lines, to be transformed in json + res = True + + while '+-o' not in self.line: + if not self.line: # end of file + res = False + break + + node_data.append(self.line) + self.get_line() + + data_dict = self.node_data_to_json(node_data) + self.dict_update(data_tree, data_dict) + + return res + + def dict_update(self, main_dict, data_dict): + data_dict_len = len(data_dict) + main_dict_len = len(main_dict) + main_dict.update(data_dict) + + if len(main_dict) != data_dict_len + main_dict_len: + logger.warning("One of the keys was already present in the json, data loss may occur") + + def parse_title(self): + if "+-o" not in self.line: + logger.warning("'non-title' line given to title parser, should not happen") + return "" + + whole_title = self.line.split("+-o", 1)[1].strip() + + if " format, to invesstigate") + + name = whole_title.split(' bool: + """ + some values overflow on the few next lines + this condition assumes there is no '=' in the exceeding data + (which was the case up to what I saw) + + p.s. : if you wonder why cond4 is necessary, it is only for + the last leaf, which has no '|' symbols. without cond4, + these lines would be seen as anomalies + """ + cond1 = not re.search(r'^\s*\|+', data) + cond2 = len(data.strip()) > 0 + cond3 = data.strip() not in ('{', '}') + cond4 = '=' not in data + + if cond1 and cond2 and cond3 and cond4: + dictio[key] += data.strip() + return True + return False + + def node_data_to_json(self, data_array: list[str]) -> dict: + res = {} + key = None + + for data in data_array: + self.handle_anomalies(res, data, key) + + # remove spaces and pipes at start + clean_line = re.sub(r'^(\s|\|)*', '', data) + + if '=' not in clean_line: + continue + + # split at the first equal only + key, value = clean_line.split('=', 1) + + # remove first and last " (in case the key has more quotes inside) + key = key.replace('"', '', 1) + key = key[::-1].replace('"', '', 1)[::-1] + key = key.strip() + + self.check_key_uniqueness(res, key) + res[key] = value.strip() + + return res + + def iterate_children(self, depth: int, data_tree: dict): + while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'): + if self.line[depth: depth + 3] == '+-o': + name = self.parse_title()[0] + self.check_key_uniqueness(data_tree, name) + data_tree[name] = {} + self.recursive_call(data_tree[name]) + + else: + self.get_line() + + def recursive_fun(self, data_tree: dict): + is_leaf = False + self.get_line() + + # check if we're at the start of a node + self.check_start_node() + + additional_data = self.parse_title()[1] + additional_data = IORegStringParser().get_parsed(additional_data) + self.dict_update(data_tree, additional_data) + + depth = self.line.index('o') # to identify the other nodes that have the same parent + self.get_line() + + # check if its a leaf + if self.line[depth] != '|': + is_leaf = True + + # Fetch the data of the node + if not self.fetch_node_data(data_tree): + return # EOF + + # stop if we're a leaf + if is_leaf: + self.open_file.seek(self.rollback_addr) + return + + # sanity check + self.not_empty_node_check() + + # Iterates over each child to call the current function + self.iterate_children(depth, data_tree) From 2de2208e0a3ba288b91a8d93aee21cb84cefd94a Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Wed, 17 Sep 2025 15:53:35 +0200 Subject: [PATCH 08/17] Added basic value string parsers, used for io* parsers --- src/sysdiagnose/parsers/ioservice.py | 4 +- .../utils/ioreg_parsers/string_parser.py | 66 +++++++++++++++++-- .../utils/ioreg_parsers/structure_parser.py | 10 +-- tests/test_string_parser.py | 0 4 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 tests/test_string_parser.py diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py index b0c4ec3..26a84ef 100644 --- a/src/sysdiagnose/parsers/ioservice.py +++ b/src/sysdiagnose/parsers/ioservice.py @@ -35,8 +35,8 @@ def execute(self) -> list | dict: for log_file in log_files: try: logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) - parser = IORegStructParser() - data_tree = parser.get_dict(log_file) + p = IORegStructParser() + data_tree = p.parse(log_file) except Exception: logger.exception("IOService parsing crashed") diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index a5e9eaf..fed198d 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -1,9 +1,63 @@ +import re +from enum import Enum +class DataType(Enum): + XML_LIKE = 1 + LIST = 2 -class IORegStringParser: - def __init__(self): - pass - def get_parsed(self, input_string: str): - list_of_elements = input_string.split(',') - return {"a": "b", "c": "d"} +def parse_list(input_string: str): + input = re.search(r'\((.+,.+)\)', input_string).group(1) + list_of_elements = input.split(',') + res = [] + + for element in list_of_elements: + res.append(element.strip()) + + return res + + +def parse_xml_like(input_string: str): + input = re.search(r'<(.+)>', input_string).group(1) + list_of_elements = input.split(',') + res = {} + + for element in list_of_elements: + element = element.strip() + key = element.split(' ', 1)[0] + value = element.split(' ', 1)[1] + # TODO check key uniqueness + res[key] = value + + return res + +def detect_type(input: str) -> DataType: + if re.search(r'<.+>', input): + return DataType.XML_LIKE + + if re.search(r'\(.+,.+\)', input): + return DataType.LIST + +def parse(input_string: str): + input_string = input_string.strip() + type = detect_type(input_string) + + match type: + case DataType.XML_LIKE: + parse_xml_like(input_string) + + case DataType.LIST: + parse_list(input_string) + + case _: + print('not found') + + +test_1 = '' +test_2 = '' +test_3 = '' +test_4 = '' +test_5 = ', retain 52>' +test_6 = '' + +parse(test_5) diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index ff06c10..0dc1a4e 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -1,5 +1,5 @@ from sysdiagnose.utils.base import logger -from sysdiagnose.utils.ioreg_parsers.string_parser import IORegStringParser +from sysdiagnose.utils.ioreg_parsers import string_parser import re class IORegStructParser: @@ -9,7 +9,7 @@ class IORegStructParser: def __init__(self): pass - def get_dict(self, file_path): + def parse(self, file_path): data_tree = {} with open(file_path, 'r', errors='backslashreplace') as f: @@ -39,7 +39,7 @@ def not_empty_node_check(self): def check_key_uniqueness(self, dictio: dict, key: str): if dictio.get(key): - logger.warning('Key is already in dictionary, data may be lost') + logger.warning('Key is already in dictionary, data may be lost\n\tKey : ' + key) def fetch_node_data(self, data_tree: dict) -> bool: node_data = [] # array of lines, to be transformed in json @@ -77,7 +77,7 @@ def parse_title(self): logger.warning("Title doesnt respect the usual format, to invesstigate") name = whole_title.split(' Date: Thu, 18 Sep 2025 15:38:15 +0200 Subject: [PATCH 09/17] Added recursive version of string_parser, still rudimentary --- .../utils/ioreg_parsers/string_parser.py | 163 +++++++++++++++--- 1 file changed, 138 insertions(+), 25 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index fed198d..f4587ad 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -1,14 +1,102 @@ import re from enum import Enum +from sysdiagnose.utils.base import logger +import uuid class DataType(Enum): XML_LIKE = 1 LIST = 2 - - -def parse_list(input_string: str): - input = re.search(r'\((.+,.+)\)', input_string).group(1) - list_of_elements = input.split(',') + UNKNOWN = 3 + +class Detect: + _best_len = 0 + _best_type = DataType.UNKNOWN + _best_whole = "" # whole match, for example : + _best_content = "" # content, for example : data1, data2 + _found = False + + def __init__(self, input_string: str): + self.detect_type(input_string) + + def detect_type(self, input: str): + hit = re.search(r'<(.*)>', input) + if hit and len(hit.group(0)) > self.len: + self.assign_best(hit, DataType.XML_LIKE) + + hit = re.search(r'\((.+,.+)\)', input) + if hit and len(hit.group(0)) > self.len: + self.assign_best(hit, DataType.LIST) + + def assign_best(self, hit: re.Match, type: DataType): + self._best_len = len(hit.group(0)) + self._best_type = type + self._best_whole = hit.group(0) + self._best_content = hit.group(1) + self._found = True + + @property + def len(self) -> int: + return self._best_len + + @property + def type(self) -> DataType: + return self._best_type + + @property + def whole_match(self) -> str: + return self._best_whole + + @property + def content(self) -> str: + return self._best_content + + @property + def found(self) -> bool: + return self._found + + +def generate_tag() -> str: + return str(uuid.uuid4()) + +def check_anomaly(s: str, tag: str): + diff = s.replace(tag, '') + + if tag in s and diff: + logger.warning("Anomaly, some data was right next to " \ + "the struct (without space), this data is thus lost : ", diff) + +def list_replace(tagged_struct: list, tag: str, st: dict | list): + for i in range(len(tagged_struct)): + elem = tagged_struct[i] + if type(elem) == str and tag in elem: + check_anomaly(elem, tag) + tagged_struct[i] = st + +def dict_replace(tagged_struct: dict, tag: str, st: dict | list): + for key in tagged_struct: + elem = tagged_struct[key] + if type(elem) == str and tag in elem: + check_anomaly(elem, tag) + tagged_struct[key] = st + +def struct_replace(tagged_struct: dict | list, type: DataType, tag: str, st: dict | list): + try: + match type: + case DataType.LIST: + list_replace(tagged_struct, tag, st) + + case DataType.XML_LIKE: + dict_replace(tagged_struct, tag, st) + + case _: + pass + + except: + logger.error("When rebuilding the struct in struct_replace, the argument 'type' doesn't correspond to the given tagged_struct") + exit(1) + +def parse_list(input_string: str) -> list: + list_of_elements = input_string.split(',') res = [] for element in list_of_elements: @@ -16,10 +104,8 @@ def parse_list(input_string: str): return res - -def parse_xml_like(input_string: str): - input = re.search(r'<(.+)>', input_string).group(1) - list_of_elements = input.split(',') +def parse_xml_like(input_string: str) -> dict: + list_of_elements = input_string.split(',') res = {} for element in list_of_elements: @@ -31,33 +117,60 @@ def parse_xml_like(input_string: str): return res -def detect_type(input: str) -> DataType: - if re.search(r'<.+>', input): - return DataType.XML_LIKE - - if re.search(r'\(.+,.+\)', input): - return DataType.LIST - -def parse(input_string: str): - input_string = input_string.strip() - type = detect_type(input_string) - +def parse_type(input_string: str, type: DataType): match type: case DataType.XML_LIKE: - parse_xml_like(input_string) + return parse_xml_like(input_string) case DataType.LIST: - parse_list(input_string) + return parse_list(input_string) case _: print('not found') +def recursive_parse(input: str): + input = input.strip() + hit = Detect(input) + tagged_content = hit.content + tag_map = {} + + # recursion stop + if not hit.found: + return "", "" + + # recursion + sub_string, sub_struct = recursive_parse(hit.content) + + if not sub_string: + # form basic struct + tagged_struct = parse_type(tagged_content, hit.type) + return hit.whole_match, tagged_struct + + # replace struct by a unique tag + tag = generate_tag() + tagged_content = tagged_content.replace(sub_string, tag) + + # link tag with its computed struct + tag_map[tag] = sub_struct + + # form basic struct + tagged_struct = parse_type(tagged_content, hit.type) + + # include recursively computed struct + struct_replace(tagged_struct, hit.type, tag, tag_map[tag]) + + return hit.whole_match, tagged_struct + +def parse(input_string: str): + return recursive_parse(input_string)[1] + test_1 = '' test_2 = '' test_3 = '' test_4 = '' -test_5 = ', retain 52>' -test_6 = '' +test_5 = '' +test_6 = ', user , retain 52>' +test_7 = '' -parse(test_5) +print(parse(test_7)) From 49b1982f2b95235db7b0df0878557d0dadd60fec Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Thu, 18 Sep 2025 20:28:01 +0200 Subject: [PATCH 10/17] Refactor of algo logic in utils/string_parser --- .../utils/ioreg_parsers/string_parser.py | 154 ++++++++++-------- 1 file changed, 86 insertions(+), 68 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index f4587ad..0036386 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -9,7 +9,7 @@ class DataType(Enum): UNKNOWN = 3 class Detect: - _best_len = 0 + _best_len = float('inf') _best_type = DataType.UNKNOWN _best_whole = "" # whole match, for example : _best_content = "" # content, for example : data1, data2 @@ -19,12 +19,14 @@ def __init__(self, input_string: str): self.detect_type(input_string) def detect_type(self, input: str): - hit = re.search(r'<(.*)>', input) - if hit and len(hit.group(0)) > self.len: + # find the smallest + hit = re.search(r'<((?!.*<.*).*?)>', input) + if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.XML_LIKE) - hit = re.search(r'\((.+,.+)\)', input) - if hit and len(hit.group(0)) > self.len: + # find the smallest + hit = re.search(r'\(((?!.*\(.*).+?,.+?)\)', input) + if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.LIST) def assign_best(self, hit: re.Match, type: DataType): @@ -37,19 +39,19 @@ def assign_best(self, hit: re.Match, type: DataType): @property def len(self) -> int: return self._best_len - + @property def type(self) -> DataType: return self._best_type - + @property def whole_match(self) -> str: return self._best_whole - + @property def content(self) -> str: return self._best_content - + @property def found(self) -> bool: return self._found @@ -58,42 +60,19 @@ def found(self) -> bool: def generate_tag() -> str: return str(uuid.uuid4()) + def check_anomaly(s: str, tag: str): diff = s.replace(tag, '') if tag in s and diff: - logger.warning("Anomaly, some data was right next to " \ - "the struct (without space), this data is thus lost : ", diff) + logger.warning("Warning : Anomaly: some data was right next to " + "the struct (without space), this data is thus lost\n---> " + diff) -def list_replace(tagged_struct: list, tag: str, st: dict | list): - for i in range(len(tagged_struct)): - elem = tagged_struct[i] - if type(elem) == str and tag in elem: - check_anomaly(elem, tag) - tagged_struct[i] = st -def dict_replace(tagged_struct: dict, tag: str, st: dict | list): - for key in tagged_struct: - elem = tagged_struct[key] - if type(elem) == str and tag in elem: - check_anomaly(elem, tag) - tagged_struct[key] = st - -def struct_replace(tagged_struct: dict | list, type: DataType, tag: str, st: dict | list): - try: - match type: - case DataType.LIST: - list_replace(tagged_struct, tag, st) - - case DataType.XML_LIKE: - dict_replace(tagged_struct, tag, st) - - case _: - pass - - except: - logger.error("When rebuilding the struct in struct_replace, the argument 'type' doesn't correspond to the given tagged_struct") - exit(1) +def check_key_uniqueness(dictio: dict, key: str): + if dictio.get(key): + logger.warning('Warning : Key is already in dictionary, data may be lost\n---> ' + key) + def parse_list(input_string: str) -> list: list_of_elements = input_string.split(',') @@ -104,6 +83,7 @@ def parse_list(input_string: str) -> list: return res + def parse_xml_like(input_string: str) -> dict: list_of_elements = input_string.split(',') res = {} @@ -112,11 +92,12 @@ def parse_xml_like(input_string: str) -> dict: element = element.strip() key = element.split(' ', 1)[0] value = element.split(' ', 1)[1] - # TODO check key uniqueness + check_key_uniqueness(res, key) res[key] = value return res + def parse_type(input_string: str, type: DataType): match type: case DataType.XML_LIKE: @@ -128,41 +109,77 @@ def parse_type(input_string: str, type: DataType): case _: print('not found') -def recursive_parse(input: str): - input = input.strip() - hit = Detect(input) - tagged_content = hit.content - tag_map = {} - # recursion stop - if not hit.found: - return "", "" +def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): + for key in final_struct: + elem = final_struct[key] - # recursion - sub_string, sub_struct = recursive_parse(hit.content) + if isinstance(elem, str) and tag in elem: + check_anomaly(elem, tag) + final_struct[key] = constructed + return True - if not sub_string: - # form basic struct - tagged_struct = parse_type(tagged_content, hit.type) - return hit.whole_match, tagged_struct + elif isinstance(elem, list): + if resolve_tag_list(elem, tag, constructed): + return True + + return False - # replace struct by a unique tag - tag = generate_tag() - tagged_content = tagged_content.replace(sub_string, tag) - # link tag with its computed struct - tag_map[tag] = sub_struct +def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): + for i in range(len(final_struct)): + elem = final_struct[i] + + if isinstance(elem, str) and tag in elem: + check_anomaly(elem, tag) + final_struct[i] = constructed + return True + + elif isinstance(elem, dict): + if resolve_tag_dict(elem, tag, constructed): + return True + + return False + + +def resolve_tag(final_struct: dict | list, tag: str, constructed: dict | list): + if isinstance(final_struct, dict): + resolve_tag_dict(final_struct, tag, constructed) + + elif isinstance(final_struct, list): + resolve_tag_list(final_struct, tag, constructed) + + else: + logger.error('Error : struct type not found') + exit(1) + + +def parse(data_string: str): + data_string = data_string.strip() + hit = Detect(data_string) + final_struct = None + + # recursion stop + if not hit.found: + return None # form basic struct - tagged_struct = parse_type(tagged_content, hit.type) + constructed = parse_type(hit.content, hit.type) + + # replace struct by an unique tag + tag = generate_tag() + data_string = data_string.replace(hit.whole_match, tag) - # include recursively computed struct - struct_replace(tagged_struct, hit.type, tag, tag_map[tag]) + # recursion + final_struct = parse(data_string) - return hit.whole_match, tagged_struct + # reconstruct data structure + if not final_struct: + final_struct = constructed # at the root + else: + resolve_tag(final_struct, tag, constructed) -def parse(input_string: str): - return recursive_parse(input_string)[1] + return final_struct test_1 = '' @@ -170,7 +187,8 @@ def parse(input_string: str): test_3 = '' test_4 = '' test_5 = '' -test_6 = ', user , retain 52>' -test_7 = '' +test_6 = ', 0x300000), retain 52>' +test_7 = ', user , retain 52>' +test_8 = '), otherkey (otherval, otherval2)>' -print(parse(test_7)) +print(parse(test_8)) From 61d386ca1ad76431207bcf4ae4526ff9f3288dfc Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Fri, 19 Sep 2025 17:38:40 +0200 Subject: [PATCH 11/17] Enhanced string_parser + Added solid tests + fixes here and there --- .../utils/ioreg_parsers/string_parser.py | 87 ++++++++++++++----- .../utils/ioreg_parsers/structure_parser.py | 3 +- tests/test_string_parser.py | 60 +++++++++++++ 3 files changed, 125 insertions(+), 25 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index 0036386..5242b9d 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -6,7 +6,8 @@ class DataType(Enum): XML_LIKE = 1 LIST = 2 - UNKNOWN = 3 + STRING = 3 + UNKNOWN = 4 class Detect: _best_len = float('inf') @@ -19,16 +20,34 @@ def __init__(self, input_string: str): self.detect_type(input_string) def detect_type(self, input: str): - # find the smallest - hit = re.search(r'<((?!.*<.*).*?)>', input) + """ Note on the match types + + XML_LIKE : data inside < > + + LIST : data in parentheses with at least one comma + + STRING : parentheses that dont contain any comma. + example : I'm good at coding (not really) <-- shouldn't be a list, simply text + + """ # noqa: W605 + + # find xml like dict ex : + hit = re.search(r'<([^<>]*)>', input) if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.XML_LIKE) - # find the smallest - hit = re.search(r'\(((?!.*\(.*).+?,.+?)\)', input) + # find list in parentheses ex : (a, b, c) + hit = re.search(r'\(([^()]*,[^()]*)\)', input) if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.LIST) + # find simple parentheses without ',' ex : (hello world) + hit = re.search(r'(\([^,)(]*\))', input) + if hit and len(hit.group(0)) < self._best_len: + self.assign_best(hit, DataType.STRING) + + self.warn_unknown_struct(input) + def assign_best(self, hit: re.Match, type: DataType): self._best_len = len(hit.group(0)) self._best_type = type @@ -36,6 +55,18 @@ def assign_best(self, hit: re.Match, type: DataType): self._best_content = hit.group(1) self._found = True + def warn_unknown_struct(self, input: str): + main_cond = self._best_type is DataType.UNKNOWN + cond_1 = '<' in input and '>' in input + cond_2 = '(' in input and ')' in input + cond_3 = '[' in input and ']' in input + cond_4 = '{' in input and '}' in input + + if (main_cond and (cond_1 or cond_2 or cond_3 or cond_4)): + logger.warning('Warning : A structure might have been recognized ' + 'in here, if so please consider adding it to the ' + 'string_parser.py file\n---> ' + input) + @property def len(self) -> int: return self._best_len @@ -91,7 +122,8 @@ def parse_xml_like(input_string: str) -> dict: for element in list_of_elements: element = element.strip() key = element.split(' ', 1)[0] - value = element.split(' ', 1)[1] + value = element.split(' ', 1)[1].strip() + # TODO if only a key is present, add true as value check_key_uniqueness(res, key) res[key] = value @@ -106,8 +138,12 @@ def parse_type(input_string: str, type: DataType): case DataType.LIST: return parse_list(input_string) + case DataType.STRING: + return input_string + case _: - print('not found') + logger.error("Error : Type not found in parse_type(). (Note : " + "you probably forgot to add it to the match case)") def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): @@ -115,14 +151,21 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): elem = final_struct[key] if isinstance(elem, str) and tag in elem: - check_anomaly(elem, tag) - final_struct[key] = constructed + if isinstance(constructed, str): + final_struct[key] = final_struct[key].replace(tag, constructed) + else: + check_anomaly(elem, tag) + final_struct[key] = constructed return True elif isinstance(elem, list): if resolve_tag_list(elem, tag, constructed): return True + elif isinstance(elem, dict): + if resolve_tag_dict(elem, tag, constructed): + return True + return False @@ -130,11 +173,19 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): for i in range(len(final_struct)): elem = final_struct[i] + # TODO repetition with resolve_tag_dict, put in a func if isinstance(elem, str) and tag in elem: - check_anomaly(elem, tag) - final_struct[i] = constructed + if isinstance(constructed, str): + final_struct[i] = final_struct[i].replace(tag, constructed) + else: + check_anomaly(elem, tag) + final_struct[i] = constructed return True + elif isinstance(elem, list): + if resolve_tag_list(elem, tag, constructed): + return True + elif isinstance(elem, dict): if resolve_tag_dict(elem, tag, constructed): return True @@ -168,7 +219,7 @@ def parse(data_string: str): # replace struct by an unique tag tag = generate_tag() - data_string = data_string.replace(hit.whole_match, tag) + data_string = data_string.replace(hit.whole_match, tag, 1) # recursion final_struct = parse(data_string) @@ -180,15 +231,3 @@ def parse(data_string: str): resolve_tag(final_struct, tag, constructed) return final_struct - - -test_1 = '' -test_2 = '' -test_3 = '' -test_4 = '' -test_5 = '' -test_6 = ', 0x300000), retain 52>' -test_7 = ', user , retain 52>' -test_8 = '), otherkey (otherval, otherval2)>' - -print(parse(test_8)) diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index 0dc1a4e..ec655e6 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -54,6 +54,7 @@ def fetch_node_data(self, data_tree: dict) -> bool: self.get_line() data_dict = self.node_data_to_json(node_data) + # TODO parse each value self.dict_update(data_tree, data_dict) return res @@ -146,7 +147,7 @@ def recursive_fun(self, data_tree: dict): self.check_start_node() additional_data = self.parse_title()[1] - additional_data = string_parser.get_parsed(additional_data) + additional_data = string_parser.parse(additional_data) self.dict_update(data_tree, additional_data) depth = self.line.index('o') # to identify the other nodes that have the same parent diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py index e69de29..f82cf02 100644 --- a/tests/test_string_parser.py +++ b/tests/test_string_parser.py @@ -0,0 +1,60 @@ +from tests import SysdiagnoseTestCase +import unittest +import sysdiagnose.utils.ioreg_parsers.string_parser as sp + + +class TestStringParser(SysdiagnoseTestCase): + + test_list = [ + '', + '(li1, li2, li3, li4)', + '< k1 v1 , k2 v2, k3 v3 ,k4 v4 >', + '>', + '( li 1, li 2 , li3)', + '', + '', + ' , k4 (li111, li222, li333) >', + '), m >', + ' ,k3 (>, (li111), (li8, li9)) , k4 (li111, li222, li333) >' + ] + + expected_parsed = [ + {'key': 'val', 'k2': 'v2'}, + ['li1', 'li2', 'li3', 'li4'], + {'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'}, + {'k1': {'k11': 'v11'}}, + ['li 1', 'li 2', 'li3'], + {'k1': 'v1', 'k2': 'v2', 'k:3': ['li1', 'li2', 'li3', 'li4'], 'k4': 'v4'}, + {'k1': 'v1', 'k2': ['li1', 'li2', 'li3', 'li4'], 'k3': ['li11', 'li22'], 'k4': ['li111', 'li222', 'li333']}, + {'k1': 'v1', 'k2': ['li1', 'li2', 'li3', 'li4'], 'k3': {'k11': 'v11', 'k22': 'v22'}, 'k4': ['li111', 'li222', 'li333']}, + {'l': ['1', '2', {'k': ['', '', '', '']}], 'm': {'g': '()', 'k': ['', ''], 'm': ['()', '(())']}}, + {'k1': 'v1', 'k2': {'k11': 'v11', 'k22': 'v22'}, 'k3': [{'k111': {'a': 'b', 'c': ['l1', 'l2']}}, '(li111)', ['li8', 'li9']], 'k4': ['li111', 'li222', 'li333']} + ] + + expected_detect = [ + ('key val, k2 v2', sp.DataType.XML_LIKE), + ('li1, li2, li3, li4', sp.DataType.LIST), + (' k1 v1 , k2 v2, k3 v3 ,k4 v4 ', sp.DataType.XML_LIKE), + ('k11 v11', sp.DataType.XML_LIKE), + (' li 1, li 2 , li3', sp.DataType.LIST), + ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), + ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), + ('k11 v11,k22 v22', sp.DataType.XML_LIKE), + ('()', sp.DataType.STRING), + ('(li111)', sp.DataType.STRING) + ] + + def test_detect(self): + for test_val, (exp_cont, exp_type) in zip(self.test_list, self.expected_detect): + d = sp.Detect(test_val) + self.assertTrue(d.content == exp_cont) + self.assertTrue(d.type == exp_type) + + def test_parsing(self): + for test_val, expected in zip(self.test_list, self.expected_parsed): + result = sp.parse(test_val) + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() From 5c81d1cc983f86fc397c692d7f51f095d4cbada0 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Mon, 22 Sep 2025 13:34:36 +0200 Subject: [PATCH 12/17] string_parser now fully working on the testfile, quoted values not optimized yet --- .../utils/ioreg_parsers/string_parser.py | 128 ++++++++++++++---- .../utils/ioreg_parsers/structure_parser.py | 60 ++++++-- tests/test_string_parser.py | 8 +- 3 files changed, 157 insertions(+), 39 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index 5242b9d..3fc928a 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -4,10 +4,11 @@ import uuid class DataType(Enum): - XML_LIKE = 1 - LIST = 2 - STRING = 3 - UNKNOWN = 4 + XML_DICT = 1 + CURLY_DICT = 2 + LIST = 3 + STRING = 4 + UNKNOWN = 5 class Detect: _best_len = float('inf') @@ -22,7 +23,10 @@ def __init__(self, input_string: str): def detect_type(self, input: str): """ Note on the match types - XML_LIKE : data inside < > + XML_DICT : data inside <> + excluded : <> , < > , < > + + CURLY_DICT : like xml_dict but with {} instead of <> LIST : data in parentheses with at least one comma @@ -32,12 +36,17 @@ def detect_type(self, input: str): """ # noqa: W605 # find xml like dict ex : - hit = re.search(r'<([^<>]*)>', input) + hit = self.find_smallest(r'<([^<>]*[^\s<>][^<>]*)>', input) + if hit and len(hit.group(0)) < self._best_len: + self.assign_best(hit, DataType.XML_DICT) + + # find dict in {} ex : {key1=val1, k2=v2} + hit = self.find_smallest(r'{([^{}]*)}', input) if hit and len(hit.group(0)) < self._best_len: - self.assign_best(hit, DataType.XML_LIKE) + self.assign_best(hit, DataType.CURLY_DICT) # find list in parentheses ex : (a, b, c) - hit = re.search(r'\(([^()]*,[^()]*)\)', input) + hit = self.find_smallest(r'\(([^()]*,[^()]*)\)', input) if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.LIST) @@ -46,6 +55,16 @@ def detect_type(self, input: str): if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.STRING) + # find [] parentheses without ',' nor '=' ex : [hello world] + hit = re.search(r'(\[[^,=\[\]]*\])', input) + if hit and len(hit.group(0)) < self._best_len: + self.assign_best(hit, DataType.STRING) + + # find simple double-quotes ex : "hello world" + hit = re.search(r'("[^"]*")', input) + if hit and len(hit.group(0)) < self._best_len: + self.assign_best(hit, DataType.STRING) + self.warn_unknown_struct(input) def assign_best(self, hit: re.Match, type: DataType): @@ -55,14 +74,21 @@ def assign_best(self, hit: re.Match, type: DataType): self._best_content = hit.group(1) self._found = True + def find_smallest(self, regex: str, data: str): + pattern = re.compile(regex) + matches = list(pattern.finditer(data)) + if not matches: + return None + return min(matches, key=lambda m: len(m.group(0))) + def warn_unknown_struct(self, input: str): main_cond = self._best_type is DataType.UNKNOWN + cond_exceptions = input != '{}' and input != '<>' and input != '()' cond_1 = '<' in input and '>' in input cond_2 = '(' in input and ')' in input - cond_3 = '[' in input and ']' in input - cond_4 = '{' in input and '}' in input + cond_3 = '{' in input and '}' in input - if (main_cond and (cond_1 or cond_2 or cond_3 or cond_4)): + if (main_cond and cond_exceptions and (cond_1 or cond_2 or cond_3)): logger.warning('Warning : A structure might have been recognized ' 'in here, if so please consider adding it to the ' 'string_parser.py file\n---> ' + input) @@ -94,11 +120,17 @@ def generate_tag() -> str: def check_anomaly(s: str, tag: str): diff = s.replace(tag, '') + structured = s.replace(tag, '[STRUCT]') + # cases we dont have to warn about. ex : (()) is same as - if tag in s and diff: + if tag in s and diff and not is_redundent_syntax_regex(diff): logger.warning("Warning : Anomaly: some data was right next to " - "the struct (without space), this data is thus lost\n---> " + diff) + "the struct (without space), this data is thus lost\n---> " + structured) +def is_redundent_syntax_regex(s: str): + """ If we have for example ([ ]) around a struct, we consider it useless + Example : "[()]" is the same as """ + return re.search(r'^[(){}\[\]<>""]+$', s) def check_key_uniqueness(dictio: dict, key: str): if dictio.get(key): @@ -115,15 +147,27 @@ def parse_list(input_string: str) -> list: return res -def parse_xml_like(input_string: str) -> dict: +def parse_dict(input_string: str, separator: str) -> dict: list_of_elements = input_string.split(',') res = {} + if list_of_elements == ['']: + return res + for element in list_of_elements: element = element.strip() - key = element.split(' ', 1)[0] - value = element.split(' ', 1)[1].strip() - # TODO if only a key is present, add true as value + splitted = element.split(separator, 1) + key = splitted[0] + + # value is true/false if there is only a key + if len(splitted) > 1: + value = splitted[1].strip() + elif key[0] == '!': + value = 'false' + key = key[1:] + else: + value = 'true' + check_key_uniqueness(res, key) res[key] = value @@ -132,8 +176,11 @@ def parse_xml_like(input_string: str) -> dict: def parse_type(input_string: str, type: DataType): match type: - case DataType.XML_LIKE: - return parse_xml_like(input_string) + case DataType.XML_DICT: + return parse_dict(input_string, ' ') + + case DataType.CURLY_DICT: + return parse_dict(input_string, '=') case DataType.LIST: return parse_list(input_string) @@ -158,6 +205,17 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): final_struct[key] = constructed return True + elif isinstance(key, str) and tag in key: + if isinstance(constructed, str): + new_key = key.replace(tag, constructed) + value = final_struct[key] + del final_struct[key] + final_struct[new_key] = value + else: + logger.error("Error : Trying to use a struct as a key in a dict") + final_struct[key] = constructed + return True + elif isinstance(elem, list): if resolve_tag_list(elem, tag, constructed): return True @@ -193,20 +251,44 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): return False -def resolve_tag(final_struct: dict | list, tag: str, constructed: dict | list): +def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str): if isinstance(final_struct, dict): resolve_tag_dict(final_struct, tag, constructed) elif isinstance(final_struct, list): resolve_tag_list(final_struct, tag, constructed) + # TODO struct in string doesnt work, for example () + elif isinstance(final_struct, str): + if not isinstance(constructed, str): + if final_struct.replace(tag, "") == '()': + final_struct = constructed + else: + user_friendly = final_struct.replace(tag, "[STRUCT]") + lost_data = final_struct.replace(tag, "") + if not is_redundent_syntax_regex(lost_data): + logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly) + final_struct = constructed + else: + final_struct = final_struct.replace(tag, constructed) + else: logger.error('Error : struct type not found') exit(1) + # return is necessary bcs strings are not passed by reference in python + return final_struct + -def parse(data_string: str): +def parse(data_string: str, first_run: bool = True): data_string = data_string.strip() + # if first_run: print('========= ' + data_string) + + # dont parse if too long + if first_run and len(data_string) > 10000: + logger.warning('Skipped a too long lines with ' + str(len(data_string)) + ' characters') + return data_string + hit = Detect(data_string) final_struct = None @@ -222,12 +304,12 @@ def parse(data_string: str): data_string = data_string.replace(hit.whole_match, tag, 1) # recursion - final_struct = parse(data_string) + final_struct = parse(data_string, False) # reconstruct data structure if not final_struct: final_struct = constructed # at the root else: - resolve_tag(final_struct, tag, constructed) + final_struct = resolve_tag(final_struct, tag, constructed) return final_struct diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index ec655e6..016ce3f 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -54,18 +54,30 @@ def fetch_node_data(self, data_tree: dict) -> bool: self.get_line() data_dict = self.node_data_to_json(node_data) - # TODO parse each value + # TODO test this + self.parse_values(data_dict) self.dict_update(data_tree, data_dict) return res - def dict_update(self, main_dict, data_dict): - data_dict_len = len(data_dict) - main_dict_len = len(main_dict) - main_dict.update(data_dict) + def parse_values(self, data_dict: dict): + for key in data_dict: + value = data_dict[key] + constructed = string_parser.parse(value) + if constructed: + data_dict[key] = constructed - if len(main_dict) != data_dict_len + main_dict_len: - logger.warning("One of the keys was already present in the json, data loss may occur") + def dict_update(self, main_dict, data_dict): + """ Redefining the dict.update function to handle key collisions """ + + for key in data_dict: + if main_dict.get(key): + if isinstance(main_dict[key], list): + main_dict[key].append(data_dict[key]) + else: + main_dict[key] = [main_dict[key], data_dict[key]] + else: + main_dict[key] = data_dict[key] def parse_title(self): if "+-o" not in self.line: @@ -82,6 +94,10 @@ def parse_title(self): return name, data + def warn_if_no_struct(self, data: str | dict | list): + if isinstance(data, str): + logger.warning("No struct found in a title, should always have one\n---> " + data) + def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool: """ some values overflow on the few next lines @@ -132,13 +148,30 @@ def iterate_children(self, depth: int, data_tree: dict): while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'): if self.line[depth: depth + 3] == '+-o': name = self.parse_title()[0] - self.check_key_uniqueness(data_tree, name) - data_tree[name] = {} - self.recursive_call(data_tree[name]) + new_child = self.setup_new_child(data_tree, name) + self.recursive_call(new_child) else: self.get_line() + def setup_new_child(self, data_tree, key): + """ This function is dedicated to iterate_child, it handles the special cases + where a node name is already present for the same parent """ + + if data_tree.get(key): + if isinstance(data_tree[key], list): + # case already list of data nodes + data_tree[key].append({}) + else: + # case currently single data node + data_tree[key] = [data_tree[key], {}] + return data_tree[key][-1] + + else: + # case new key + data_tree[key] = {} + return data_tree[key] + def recursive_fun(self, data_tree: dict): is_leaf = False self.get_line() @@ -146,8 +179,11 @@ def recursive_fun(self, data_tree: dict): # check if we're at the start of a node self.check_start_node() - additional_data = self.parse_title()[1] - additional_data = string_parser.parse(additional_data) + # try to get a struct out of the data + title_data = self.parse_title()[1] + additional_data = string_parser.parse(title_data) or title_data + self.warn_if_no_struct(additional_data) + self.dict_update(data_tree, additional_data) depth = self.line.index('o') # to identify the other nodes that have the same parent diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py index f82cf02..8193f80 100644 --- a/tests/test_string_parser.py +++ b/tests/test_string_parser.py @@ -32,14 +32,14 @@ class TestStringParser(SysdiagnoseTestCase): ] expected_detect = [ - ('key val, k2 v2', sp.DataType.XML_LIKE), + ('key val, k2 v2', sp.DataType.XML_DICT), ('li1, li2, li3, li4', sp.DataType.LIST), - (' k1 v1 , k2 v2, k3 v3 ,k4 v4 ', sp.DataType.XML_LIKE), - ('k11 v11', sp.DataType.XML_LIKE), + (' k1 v1 , k2 v2, k3 v3 ,k4 v4 ', sp.DataType.XML_DICT), + ('k11 v11', sp.DataType.XML_DICT), (' li 1, li 2 , li3', sp.DataType.LIST), ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), - ('k11 v11,k22 v22', sp.DataType.XML_LIKE), + ('k11 v11,k22 v22', sp.DataType.XML_DICT), ('()', sp.DataType.STRING), ('(li111)', sp.DataType.STRING) ] From 12c566ec499ab48d0dfdece31bcb96b927efd251 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Tue, 23 Sep 2025 07:50:37 +0200 Subject: [PATCH 13/17] Added a line preparation that greatly optimizes the recursion depth + some cleanup --- .../utils/ioreg_parsers/string_parser.py | 118 +++++++++++++++--- tests/test_string_parser.py | 1 + 2 files changed, 105 insertions(+), 14 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index 3fc928a..cec9f98 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -1,5 +1,6 @@ import re from enum import Enum +import sys from sysdiagnose.utils.base import logger import uuid @@ -132,6 +133,50 @@ def is_redundent_syntax_regex(s: str): Example : "[()]" is the same as """ return re.search(r'^[(){}\[\]<>""]+$', s) +def prepare_line(line: str) -> str: + """ remove unnecessary double-quotes + quotes are needed when a comma is inside. + example : + != + + Note : regex cant be used, need to be statefull i.e. consider opening and closing quotes + example that doesnt work with regex: "a,"b"c," + gives : '"a,bc,"' + should give : '"a,"b"c,"' + (the quotes in "a," aren't removed bcs of the comma, so "b" is detected as a string) + """ + inside = False + opening_pos = None + skipping = False + parse_char = (',', '=', '{', '}', '(', ')') + line = line.strip() + + i = 0 + while i < len(line): + if line[i] == '"': + if inside: + if not skipping: + line = line[:i] + line[i + 1:] # remove last " + line = line[:opening_pos] + line[opening_pos + 1:] # remove first " + i -= 1 + else: + i += 1 + inside = False + + else: + inside = True + opening_pos = i + skipping = False + i += 1 + continue + + if inside and line[i] in parse_char: + skipping = True + + i += 1 + + return line + def check_key_uniqueness(dictio: dict, key: str): if dictio.get(key): logger.warning('Warning : Key is already in dictionary, data may be lost\n---> ' + key) @@ -196,6 +241,7 @@ def parse_type(input_string: str, type: DataType): def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): for key in final_struct: elem = final_struct[key] + #return resolve_tag_list_dict(final_struct, elem, key, tag, constructed) if isinstance(elem, str) and tag in elem: if isinstance(constructed, str): @@ -223,6 +269,35 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): elif isinstance(elem, dict): if resolve_tag_dict(elem, tag, constructed): return True + return False + +def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str): + if isinstance(elem, str) and tag in elem: + if isinstance(constructed, str): + final_struct[key] = final_struct[key].replace(tag, constructed) + else: + check_anomaly(elem, tag) + final_struct[key] = constructed + return True + + elif isinstance(key, str) and tag in key: # only for dict, key is int for list + if isinstance(constructed, str): + new_key = key.replace(tag, constructed) + value = final_struct[key] + del final_struct[key] + final_struct[new_key] = value + else: + logger.error("Error : Trying to use a struct as a key in a dict") + final_struct[key] = constructed + return True + + elif isinstance(elem, list): + if resolve_tag_list(elem, tag, constructed): + return True + + elif isinstance(elem, dict): + if resolve_tag_dict(elem, tag, constructed): + return True return False @@ -230,8 +305,8 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): for i in range(len(final_struct)): elem = final_struct[i] + #return resolve_tag_list_dict(final_struct, elem, i, tag, constructed) - # TODO repetition with resolve_tag_dict, put in a func if isinstance(elem, str) and tag in elem: if isinstance(constructed, str): final_struct[i] = final_struct[i].replace(tag, constructed) @@ -247,9 +322,7 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): elif isinstance(elem, dict): if resolve_tag_dict(elem, tag, constructed): return True - - return False - + return False def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str): if isinstance(final_struct, dict): @@ -258,7 +331,6 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l elif isinstance(final_struct, list): resolve_tag_list(final_struct, tag, constructed) - # TODO struct in string doesnt work, for example () elif isinstance(final_struct, str): if not isinstance(constructed, str): if final_struct.replace(tag, "") == '()': @@ -280,15 +352,10 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l return final_struct -def parse(data_string: str, first_run: bool = True): - data_string = data_string.strip() - # if first_run: print('========= ' + data_string) - - # dont parse if too long - if first_run and len(data_string) > 10000: - logger.warning('Skipped a too long lines with ' + str(len(data_string)) + ' characters') - return data_string +def parse_main_loop(data_string: str, depth: dict): + depth['value'] += 1 + # Detection hit = Detect(data_string) final_struct = None @@ -304,7 +371,7 @@ def parse(data_string: str, first_run: bool = True): data_string = data_string.replace(hit.whole_match, tag, 1) # recursion - final_struct = parse(data_string, False) + final_struct = parse_main_loop(data_string, depth) # reconstruct data structure if not final_struct: @@ -313,3 +380,26 @@ def parse(data_string: str, first_run: bool = True): final_struct = resolve_tag(final_struct, tag, constructed) return final_struct + +def parse(data_string: str): + # make it a struct so it is passed by reference + depth = {'value': 0} + + # increase recursion depth, default is at 1000 + sys.setrecursionlimit(3000) + + # greatly reduce recursion depth i.e. 80 000+ chars parsed against max 10 000 chars before + data_string = prepare_line(data_string) + + try: + data_string = parse_main_loop(data_string, depth) + except RecursionError: + logger.warning("Skipped line with " + str(len(data_string)) + " characters. " + "Recursion depth : " + str(depth['value']) + "\n" + "--> max recursion depth can be increased in utils/string_parser.py" + " in parse(). Feel free to try as high as needed to parse this line.") + + return data_string + + +print(parse('')) diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py index 8193f80..11f123b 100644 --- a/tests/test_string_parser.py +++ b/tests/test_string_parser.py @@ -53,6 +53,7 @@ def test_detect(self): def test_parsing(self): for test_val, expected in zip(self.test_list, self.expected_parsed): result = sp.parse(test_val) + print(result) self.assertTrue(result == expected) From 58a0524e8ac47e6099914ea5394a030820b00556 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Tue, 23 Sep 2025 13:40:00 +0200 Subject: [PATCH 14/17] Cleaned code releated to ioreg and stringparsers --- src/sysdiagnose/parsers/ioacpiplane.py | 31 +++++ src/sysdiagnose/parsers/iodevicetree.py | 32 +++++ src/sysdiagnose/parsers/iofirewire.py | 32 +++++ src/sysdiagnose/parsers/iopower.py | 32 +++++ src/sysdiagnose/parsers/iousb.py | 32 +++++ .../utils/ioreg_parsers/string_parser.py | 119 ++++++------------ .../utils/ioreg_parsers/structure_parser.py | 10 +- tests/test_string_parser.py | 3 +- 8 files changed, 204 insertions(+), 87 deletions(-) create mode 100644 src/sysdiagnose/parsers/ioacpiplane.py create mode 100644 src/sysdiagnose/parsers/iodevicetree.py create mode 100644 src/sysdiagnose/parsers/iofirewire.py create mode 100644 src/sysdiagnose/parsers/iopower.py create mode 100644 src/sysdiagnose/parsers/iousb.py diff --git a/src/sysdiagnose/parsers/ioacpiplane.py b/src/sysdiagnose/parsers/ioacpiplane.py new file mode 100644 index 0000000..18f6d75 --- /dev/null +++ b/src/sysdiagnose/parsers/ioacpiplane.py @@ -0,0 +1,31 @@ +#! /usr/bin/env python3 + +import os +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser + + +class IOACPIPlaneParser(BaseParserInterface): + description = "IOACPIPlane.txt file parser" + format = "json" + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IOACPIPlane.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + log_file = self.get_log_files()[0] + data_tree = {} + + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + p = IORegStructParser() + data_tree = p.parse(log_file) + + except Exception: + logger.exception("IOACPIPlane parsing crashed") + + return data_tree diff --git a/src/sysdiagnose/parsers/iodevicetree.py b/src/sysdiagnose/parsers/iodevicetree.py new file mode 100644 index 0000000..b803adb --- /dev/null +++ b/src/sysdiagnose/parsers/iodevicetree.py @@ -0,0 +1,32 @@ +#! /usr/bin/env python3 + +import os +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser + + +class IODeviceTreeParser(BaseParserInterface): + description = "IODeviceTree.txt file parser" + format = "json" + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IODeviceTree.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + log_files = self.get_log_files() + data_tree = {} + + for log_file in log_files: + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + p = IORegStructParser() + data_tree = p.parse(log_file) + + except Exception: + logger.exception("IODeviceTree parsing crashed") + + return data_tree diff --git a/src/sysdiagnose/parsers/iofirewire.py b/src/sysdiagnose/parsers/iofirewire.py new file mode 100644 index 0000000..639f2e2 --- /dev/null +++ b/src/sysdiagnose/parsers/iofirewire.py @@ -0,0 +1,32 @@ +#! /usr/bin/env python3 + +import os +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser + + +class IOFireWireParser(BaseParserInterface): + description = "IOFireWire.txt file parser" + format = "json" + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IOFireWire.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + log_files = self.get_log_files() + data_tree = {} + + for log_file in log_files: + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + p = IORegStructParser() + data_tree = p.parse(log_file) + + except Exception: + logger.exception("IOFireWire parsing crashed") + + return data_tree diff --git a/src/sysdiagnose/parsers/iopower.py b/src/sysdiagnose/parsers/iopower.py new file mode 100644 index 0000000..08a9087 --- /dev/null +++ b/src/sysdiagnose/parsers/iopower.py @@ -0,0 +1,32 @@ +#! /usr/bin/env python3 + +import os +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser + + +class IOPowerParser(BaseParserInterface): + description = "IOPower.txt file parser" + format = "json" + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IOPower.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + log_files = self.get_log_files() + data_tree = {} + + for log_file in log_files: + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + p = IORegStructParser() + data_tree = p.parse(log_file) + + except Exception: + logger.exception("IOPower parsing crashed") + + return data_tree diff --git a/src/sysdiagnose/parsers/iousb.py b/src/sysdiagnose/parsers/iousb.py new file mode 100644 index 0000000..30688d2 --- /dev/null +++ b/src/sysdiagnose/parsers/iousb.py @@ -0,0 +1,32 @@ +#! /usr/bin/env python3 + +import os +from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser + + +class IOServiceParser(BaseParserInterface): + description = "IOUSB.txt file parser" + format = "json" + + def __init__(self, config: SysdiagnoseConfig, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_file = "ioreg/IOUSB.txt" + return [os.path.join(self.case_data_subfolder, log_file)] + + def execute(self) -> list | dict: + log_files = self.get_log_files() + data_tree = {} + + for log_file in log_files: + try: + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file}) + p = IORegStructParser() + data_tree = p.parse(log_file) + + except Exception: + logger.exception("IOUSB parsing crashed") + + return data_tree diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index cec9f98..fd97b22 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -29,7 +29,8 @@ def detect_type(self, input: str): CURLY_DICT : like xml_dict but with {} instead of <> - LIST : data in parentheses with at least one comma + LIST : data in parentheses ('[]', '()') or d-quotes with at least one comma + Note : most of basic d-quotes have been sinitized in prepare_data() STRING : parentheses that dont contain any comma. example : I'm good at coding (not really) <-- shouldn't be a list, simply text @@ -62,7 +63,7 @@ def detect_type(self, input: str): self.assign_best(hit, DataType.STRING) # find simple double-quotes ex : "hello world" - hit = re.search(r'("[^"]*")', input) + hit = re.search(r'"([^"]*)"', input) if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.STRING) @@ -75,7 +76,7 @@ def assign_best(self, hit: re.Match, type: DataType): self._best_content = hit.group(1) self._found = True - def find_smallest(self, regex: str, data: str): + def find_smallest(self, regex: str, data: str) -> re.Match: pattern = re.compile(regex) matches = list(pattern.finditer(data)) if not matches: @@ -128,7 +129,7 @@ def check_anomaly(s: str, tag: str): logger.warning("Warning : Anomaly: some data was right next to " "the struct (without space), this data is thus lost\n---> " + structured) -def is_redundent_syntax_regex(s: str): +def is_redundent_syntax_regex(s: str) -> re.Match: """ If we have for example ([ ]) around a struct, we consider it useless Example : "[()]" is the same as """ return re.search(r'^[(){}\[\]<>""]+$', s) @@ -219,7 +220,7 @@ def parse_dict(input_string: str, separator: str) -> dict: return res -def parse_type(input_string: str, type: DataType): +def parse_type(input_string: str, type: DataType) -> dict | list | str: match type: case DataType.XML_DICT: return parse_dict(input_string, ' ') @@ -237,41 +238,7 @@ def parse_type(input_string: str, type: DataType): logger.error("Error : Type not found in parse_type(). (Note : " "you probably forgot to add it to the match case)") - -def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list): - for key in final_struct: - elem = final_struct[key] - #return resolve_tag_list_dict(final_struct, elem, key, tag, constructed) - - if isinstance(elem, str) and tag in elem: - if isinstance(constructed, str): - final_struct[key] = final_struct[key].replace(tag, constructed) - else: - check_anomaly(elem, tag) - final_struct[key] = constructed - return True - - elif isinstance(key, str) and tag in key: - if isinstance(constructed, str): - new_key = key.replace(tag, constructed) - value = final_struct[key] - del final_struct[key] - final_struct[new_key] = value - else: - logger.error("Error : Trying to use a struct as a key in a dict") - final_struct[key] = constructed - return True - - elif isinstance(elem, list): - if resolve_tag_list(elem, tag, constructed): - return True - - elif isinstance(elem, dict): - if resolve_tag_dict(elem, tag, constructed): - return True - return False - -def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str): +def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str) -> bool: if isinstance(elem, str) and tag in elem: if isinstance(constructed, str): final_struct[key] = final_struct[key].replace(tag, constructed) @@ -301,30 +268,38 @@ def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, ke return False +def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list | str) -> bool: + for key in final_struct: + elem = final_struct[key] + if resolve_tag_list_dict(final_struct, elem, key, tag, constructed): + return True -def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list): + return False + +def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list | str): for i in range(len(final_struct)): elem = final_struct[i] - #return resolve_tag_list_dict(final_struct, elem, i, tag, constructed) - - if isinstance(elem, str) and tag in elem: - if isinstance(constructed, str): - final_struct[i] = final_struct[i].replace(tag, constructed) - else: - check_anomaly(elem, tag) - final_struct[i] = constructed + if resolve_tag_list_dict(final_struct, elem, i, tag, constructed): return True - elif isinstance(elem, list): - if resolve_tag_list(elem, tag, constructed): - return True + return False + +def resolve_tag_str(final_struct: dict | list | str, tag: str, constructed: dict | list | str) -> dict | list | str: + if not isinstance(constructed, str): + if final_struct.replace(tag, "") == '()': + final_struct = constructed + else: + user_friendly = final_struct.replace(tag, "[STRUCT]") + lost_data = final_struct.replace(tag, "") + if not is_redundent_syntax_regex(lost_data) and lost_data: + logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly) + final_struct = constructed + else: + final_struct = final_struct.replace(tag, constructed) - elif isinstance(elem, dict): - if resolve_tag_dict(elem, tag, constructed): - return True - return False + return final_struct -def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str): +def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str) -> dict | list | str: if isinstance(final_struct, dict): resolve_tag_dict(final_struct, tag, constructed) @@ -332,27 +307,17 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l resolve_tag_list(final_struct, tag, constructed) elif isinstance(final_struct, str): - if not isinstance(constructed, str): - if final_struct.replace(tag, "") == '()': - final_struct = constructed - else: - user_friendly = final_struct.replace(tag, "[STRUCT]") - lost_data = final_struct.replace(tag, "") - if not is_redundent_syntax_regex(lost_data): - logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly) - final_struct = constructed - else: - final_struct = final_struct.replace(tag, constructed) + final_struct = resolve_tag_str(final_struct, tag, constructed) else: logger.error('Error : struct type not found') exit(1) - # return is necessary bcs strings are not passed by reference in python + # return is necessary, strings are not passed by reference in python return final_struct -def parse_main_loop(data_string: str, depth: dict): +def parse_main_loop(data_string: str, depth: dict) -> dict | list | str: depth['value'] += 1 # Detection @@ -361,7 +326,7 @@ def parse_main_loop(data_string: str, depth: dict): # recursion stop if not hit.found: - return None + return data_string # form basic struct constructed = parse_type(hit.content, hit.type) @@ -374,14 +339,11 @@ def parse_main_loop(data_string: str, depth: dict): final_struct = parse_main_loop(data_string, depth) # reconstruct data structure - if not final_struct: - final_struct = constructed # at the root - else: - final_struct = resolve_tag(final_struct, tag, constructed) + final_struct = resolve_tag(final_struct, tag, constructed) return final_struct -def parse(data_string: str): +def parse(data_string: str) -> dict | list | str: # make it a struct so it is passed by reference depth = {'value': 0} @@ -392,7 +354,7 @@ def parse(data_string: str): data_string = prepare_line(data_string) try: - data_string = parse_main_loop(data_string, depth) + data_string = parse_main_loop(data_string, depth) or data_string except RecursionError: logger.warning("Skipped line with " + str(len(data_string)) + " characters. " "Recursion depth : " + str(depth['value']) + "\n" @@ -400,6 +362,3 @@ def parse(data_string: str): " in parse(). Feel free to try as high as needed to parse this line.") return data_string - - -print(parse('')) diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index 016ce3f..b9acf48 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -67,7 +67,7 @@ def parse_values(self, data_dict: dict): if constructed: data_dict[key] = constructed - def dict_update(self, main_dict, data_dict): + def dict_update(self, main_dict: dict, data_dict: dict): """ Redefining the dict.update function to handle key collisions """ for key in data_dict: @@ -79,10 +79,10 @@ def dict_update(self, main_dict, data_dict): else: main_dict[key] = data_dict[key] - def parse_title(self): + def parse_title(self) -> tuple: if "+-o" not in self.line: logger.warning("'non-title' line given to title parser, should not happen") - return "" + return "", "" whole_title = self.line.split("+-o", 1)[1].strip() @@ -154,8 +154,8 @@ def iterate_children(self, depth: int, data_tree: dict): else: self.get_line() - def setup_new_child(self, data_tree, key): - """ This function is dedicated to iterate_child, it handles the special cases + def setup_new_child(self, data_tree: dict, key: str) -> dict: + """ This function is dedicated to iterate_children, it handles the special cases where a node name is already present for the same parent """ if data_tree.get(key): diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py index 11f123b..6805a73 100644 --- a/tests/test_string_parser.py +++ b/tests/test_string_parser.py @@ -38,7 +38,7 @@ class TestStringParser(SysdiagnoseTestCase): ('k11 v11', sp.DataType.XML_DICT), (' li 1, li 2 , li3', sp.DataType.LIST), ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), - ('li1 , li2 ,li3, li4 ', sp.DataType.LIST), + ('li11, li22', sp.DataType.LIST), ('k11 v11,k22 v22', sp.DataType.XML_DICT), ('()', sp.DataType.STRING), ('(li111)', sp.DataType.STRING) @@ -53,7 +53,6 @@ def test_detect(self): def test_parsing(self): for test_val, expected in zip(self.test_list, self.expected_parsed): result = sp.parse(test_val) - print(result) self.assertTrue(result == expected) From 4394482b601648ca8fcf4968d11b0f02570c5059 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Wed, 24 Sep 2025 10:14:37 +0200 Subject: [PATCH 15/17] bug fix concerning strings inside <> and cleanup --- .../utils/ioreg_parsers/string_parser.py | 9 +- .../utils/ioreg_parsers/structure_parser.py | 1 - tests/test_parsers_ioservice.py | 332 ++++++++---------- 3 files changed, 146 insertions(+), 196 deletions(-) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py index fd97b22..4ec81e5 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py @@ -24,7 +24,7 @@ def __init__(self, input_string: str): def detect_type(self, input: str): """ Note on the match types - XML_DICT : data inside <> + XML_DICT : data inside <> with at least a comma or space between chars excluded : <> , < > , < > CURLY_DICT : like xml_dict but with {} instead of <> @@ -38,7 +38,7 @@ def detect_type(self, input: str): """ # noqa: W605 # find xml like dict ex : - hit = self.find_smallest(r'<([^<>]*[^\s<>][^<>]*)>', input) + hit = self.find_smallest(r'<([^<>]*([,]|[^\s<>][\s]+[^\s<>])[^<>]*)>', input) if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.XML_DICT) @@ -52,6 +52,11 @@ def detect_type(self, input: str): if hit and len(hit.group(0)) < self._best_len: self.assign_best(hit, DataType.LIST) + # find simple string data in <> ex : <648a4c> + hit = re.search(r'(<[^,<>\s]*>)', input) + if hit and len(hit.group(0)) < self._best_len: + self.assign_best(hit, DataType.STRING) + # find simple parentheses without ',' ex : (hello world) hit = re.search(r'(\([^,)(]*\))', input) if hit and len(hit.group(0)) < self._best_len: diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index b9acf48..6e4a292 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -54,7 +54,6 @@ def fetch_node_data(self, data_tree: dict) -> bool: self.get_line() data_dict = self.node_data_to_json(node_data) - # TODO test this self.parse_values(data_dict) self.dict_update(data_tree, data_dict) diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py index b402aca..c0d0f1e 100644 --- a/tests/test_parsers_ioservice.py +++ b/tests/test_parsers_ioservice.py @@ -1,4 +1,4 @@ -from sysdiagnose.parsers.ioservice import IOServiceParser +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser from tests import SysdiagnoseTestCase import unittest import io @@ -7,48 +7,47 @@ class TestParsersIOService(SysdiagnoseTestCase): def test_basic_structure(self): - for case_id, _ in self.sd.cases().items(): - p = IOServiceParser(self.sd.config, case_id=case_id) + p = IORegStructParser() # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') - start_file = io.BytesIO(b"""+-o Root node + start_file = io.StringIO("""+-o Root node | { | "data 1" = "value 1" | "data 2" = "value 2" | } | - +-o Node 2 + +-o Node 2 | { | "#address-cells" = <02000000> | "AAPL,phandle" = <01000000> | } | - +-o Node 3 + +-o Node 3 | | { | | "data 31" = "value 31" | | "data 32" = "value 32" | | } | | - | +-o Leaf 1 + | +-o Leaf 1 | | { | | "data l1" = "value l1" | | "data l2" = "value l2" | | } | | - | +-o Leaf 2 + | +-o Leaf 2 | { | "data l3" = "value l3" | "data l4" = "value l4" | } | - +-o Leaf 3 + +-o Leaf 3 | { | "data l5" = "value L5" | "data l6" = "value l6" | } | - +-o Leaf 4 + +-o Leaf 4 { "data 51" = "value 51" "data 52" = "value 52" @@ -57,63 +56,46 @@ def test_basic_structure(self): """) # noqa: W291, W293 expected = { - "Children": [ - { - "Children": [ - { - "Children": [ - { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" - }, - { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value l4\"" - }, - "Name": "Leaf 2" - } - ], - "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" - }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" - }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" - }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' }, - "Name": "Node 2" + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' } - ], - "Data": { - "data 1": "\"value 1\"", - "data 2": "\"value 2\"" - }, - "Name": "Root node" + } } p.open_file = start_file @@ -123,36 +105,35 @@ def test_basic_structure(self): self.assertTrue(result == expected) def test_value_overflow_anomaly(self): - for case_id, _ in self.sd.cases().items(): - p = IOServiceParser(self.sd.config, case_id=case_id) + p = IORegStructParser() # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') - start_file = io.BytesIO(b"""+-o Root node + start_file = io.StringIO("""+-o Root node | { | "data 1" = "value 1" | "data 2" = "value 2" | } | - +-o Node 2 + +-o Node 2 | { | "#address-cells" = <02000000> | "AAPL,phandle" = <01000000> | } | - +-o Node 3 + +-o Node 3 | | { | | "data 31" = "value 31" | | "data 32" = "value 32" | | } | | - | +-o Leaf 1 + | +-o Leaf 1 | | { | | "data l1" = "value l1" | | "data l2" = "value l2" | | } | | - | +-o Leaf 2 + | +-o Leaf 2 | { | "data l3" = "value l3" | "data l4" = "value aaaa @@ -162,13 +143,13 @@ def test_value_overflow_anomaly(self): " | } | - +-o Leaf 3 + +-o Leaf 3 | { | "data l5" = "value L5" | "data l6" = "value l6" | } | - +-o Leaf 4 + +-o Leaf 4 { "data 51" = "value 51" "data 52" = "value 52" @@ -177,63 +158,46 @@ def test_value_overflow_anomaly(self): """) # noqa: W291, W293 expected = { - "Children": [ - { - "Children": [ - { - "Children": [ - { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" - }, - { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value aaaabbbbccccdddd\"" - }, - "Name": "Leaf 2" - } - ], - "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" - }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" - }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" - }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' }, - "Name": "Node 2" + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' } - ], - "Data": { - "data 1": "\"value 1\"", - "data 2": "\"value 2\"" - }, - "Name": "Root node" + } } p.open_file = start_file @@ -243,48 +207,47 @@ def test_value_overflow_anomaly(self): self.assertTrue(result == expected) def test_non_ascii_byte_anomaly(self): - for case_id, _ in self.sd.cases().items(): - p = IOServiceParser(self.sd.config, case_id=case_id) + p = IORegStructParser() # careful, spaces and structure is important # This simulates an open file object, as if we opened it with open(path, 'rb') - start_file = io.BytesIO(b"""+-o Root node + start_file = io.StringIO("""+-o Root node | { | "data 1" = "value 1" | "data 2" = "value 2" | } | - +-o Node 2 + +-o Node 2 | { | "#address-cells" = <02000000> | "AAPL,phandle" = <01000000> | } | - +-o Node 3 + +-o Node 3 | | { | | "data 31" = "value 31" | | "data 32" = "value 32" | | } | | - | +-o Leaf 1 + | +-o Leaf 1 | | { | | "data l1" = "value l1" | | "data l2" = "value l2" | | } | | - | +-o Leaf 2 + | +-o Leaf 2 | { | "data l3" = "value l3" | "data l4" = "value -->\xbf<--" | } | - +-o Leaf 3 + +-o Leaf 3 | { | "data l5" = "value L5" | "data l6" = "value l6" | } | - +-o Leaf 4 + +-o Leaf 4 { "data 51" = "value 51" "data 52" = "value 52" @@ -293,63 +256,46 @@ def test_non_ascii_byte_anomaly(self): """) # noqa: W291, W293 expected = { - "Children": [ - { - "Children": [ - { - "Children": [ - { - "Children": [], - "Data": { - "data l1": "\"value l1\"", - "data l2": "\"value l2\"" - }, - "Name": "Leaf 1" - }, - { - "Children": [], - "Data": { - "data l3": "\"value l3\"", - "data l4": "\"value -->?<--\"" - }, - "Name": "Leaf 2" - } - ], - "Data": { - "data 31": "\"value 31\"", - "data 32": "\"value 32\"" - }, - "Name": "Node 3" - }, - { - "Children": [], - "Data": { - "data l5": "\"value L5\"", - "data l6": "\"value l6\"" - }, - "Name": "Leaf 3" - }, - { - "Children": [], - "Data": { - "data 51": "\"value 51\"", - "data 52": "\"value 52\"" - }, - "Name": "Leaf 4" - } - ], - "Data": { - "#address-cells": "<02000000>", - "AAPL,phandle": "<01000000>" + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' }, - "Name": "Node 2" + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' } - ], - "Data": { - "data 1": "\"value 1\"", - "data 2": "\"value 2\"" - }, - "Name": "Root node" + } } p.open_file = start_file From e5a9ebaea876ea63d4444668f04acf398f247421 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Thu, 25 Sep 2025 13:41:47 +0200 Subject: [PATCH 16/17] dissociated ioreg parsers + small fixes --- src/sysdiagnose/parsers/iousb.py | 2 +- tests/test_parsers_ioacpiplane.py | 320 +++++++++++++++++++++++++++++ tests/test_parsers_iodevicetree.py | 320 +++++++++++++++++++++++++++++ tests/test_parsers_iofirewire.py | 320 +++++++++++++++++++++++++++++ tests/test_parsers_iopower.py | 320 +++++++++++++++++++++++++++++ tests/test_parsers_ioservice.py | 11 + tests/test_parsers_iousb.py | 320 +++++++++++++++++++++++++++++ 7 files changed, 1612 insertions(+), 1 deletion(-) create mode 100644 tests/test_parsers_ioacpiplane.py create mode 100644 tests/test_parsers_iodevicetree.py create mode 100644 tests/test_parsers_iofirewire.py create mode 100644 tests/test_parsers_iopower.py create mode 100644 tests/test_parsers_iousb.py diff --git a/src/sysdiagnose/parsers/iousb.py b/src/sysdiagnose/parsers/iousb.py index 30688d2..1b1739a 100644 --- a/src/sysdiagnose/parsers/iousb.py +++ b/src/sysdiagnose/parsers/iousb.py @@ -5,7 +5,7 @@ from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser -class IOServiceParser(BaseParserInterface): +class IOUSBParser(BaseParserInterface): description = "IOUSB.txt file parser" format = "json" diff --git a/tests/test_parsers_ioacpiplane.py b/tests/test_parsers_ioacpiplane.py new file mode 100644 index 0000000..e2ebb92 --- /dev/null +++ b/tests/test_parsers_ioacpiplane.py @@ -0,0 +1,320 @@ +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.ioacpiplane import IOACPIPlaneParser +from tests import SysdiagnoseTestCase +import unittest +import io +import os + + +class TestParsersIOACPIPlane(SysdiagnoseTestCase): + + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IOACPIPlaneParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + def test_basic_structure(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers_iodevicetree.py b/tests/test_parsers_iodevicetree.py new file mode 100644 index 0000000..ee1e560 --- /dev/null +++ b/tests/test_parsers_iodevicetree.py @@ -0,0 +1,320 @@ +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.iodevicetree import IODeviceTreeParser +from tests import SysdiagnoseTestCase +import unittest +import io +import os + + +class TestParsersIODeviceTree(SysdiagnoseTestCase): + + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IODeviceTreeParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + def test_basic_structure(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers_iofirewire.py b/tests/test_parsers_iofirewire.py new file mode 100644 index 0000000..bb49df8 --- /dev/null +++ b/tests/test_parsers_iofirewire.py @@ -0,0 +1,320 @@ +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.iofirewire import IOFireWireParser +from tests import SysdiagnoseTestCase +import unittest +import io +import os + + +class TestParsersIOFireWire(SysdiagnoseTestCase): + + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IOFireWireParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + def test_basic_structure(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers_iopower.py b/tests/test_parsers_iopower.py new file mode 100644 index 0000000..7bd0406 --- /dev/null +++ b/tests/test_parsers_iopower.py @@ -0,0 +1,320 @@ +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.iopower import IOPowerParser +from tests import SysdiagnoseTestCase +import unittest +import io +import os + + +class TestParsersIOPower(SysdiagnoseTestCase): + + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IOPowerParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + def test_basic_structure(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py index c0d0f1e..ab54678 100644 --- a/tests/test_parsers_ioservice.py +++ b/tests/test_parsers_ioservice.py @@ -1,11 +1,22 @@ from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.ioservice import IOServiceParser from tests import SysdiagnoseTestCase import unittest import io +import os class TestParsersIOService(SysdiagnoseTestCase): + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IOServiceParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + def test_basic_structure(self): p = IORegStructParser() diff --git a/tests/test_parsers_iousb.py b/tests/test_parsers_iousb.py new file mode 100644 index 0000000..a58e625 --- /dev/null +++ b/tests/test_parsers_iousb.py @@ -0,0 +1,320 @@ +from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser +from sysdiagnose.parsers.iousb import IOUSBParser +from tests import SysdiagnoseTestCase +import unittest +import io +import os + + +class TestParsersIOService(SysdiagnoseTestCase): + + def test_parse_case(self): + for case_id, case in self.sd.cases().items(): + p = IOUSBParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + def test_basic_structure(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value l4" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value l4' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_value_overflow_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value aaaa +bbbb +cccc +dddd +" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value aaaabbbbccccdddd' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + def test_non_ascii_byte_anomaly(self): + p = IORegStructParser() + + # careful, spaces and structure is important + # This simulates an open file object, as if we opened it with open(path, 'rb') + start_file = io.StringIO("""+-o Root node + | { + | "data 1" = "value 1" + | "data 2" = "value 2" + | } + | + +-o Node 2 + | { + | "#address-cells" = <02000000> + | "AAPL,phandle" = <01000000> + | } + | + +-o Node 3 + | | { + | | "data 31" = "value 31" + | | "data 32" = "value 32" + | | } + | | + | +-o Leaf 1 + | | { + | | "data l1" = "value l1" + | | "data l2" = "value l2" + | | } + | | + | +-o Leaf 2 + | { + | "data l3" = "value l3" + | "data l4" = "value -->\xbf<--" + | } + | + +-o Leaf 3 + | { + | "data l5" = "value L5" + | "data l6" = "value l6" + | } + | + +-o Leaf 4 + { + "data 51" = "value 51" + "data 52" = "value 52" + } + +""") # noqa: W291, W293 + + expected = { + 'class': 'test1', + 'key1': 'val1', + 'data 1': 'value 1', + 'data 2': 'value 2', + 'Node 2': { + 'class': 'test2', + 'key2': 'val2', + '#address-cells': '<02000000>', + 'AAPL,phandle': '<01000000>', + 'Node 3': { + 'class': 'test3', + 'key3': 'val3', + 'data 31': 'value 31', + 'data 32': 'value 32', + 'Leaf 1': { + 'class': 'test11', + 'key11': 'val11', + 'data l1': 'value l1', + 'data l2': 'value l2' + }, + 'Leaf 2': { + 'class': 'test22', + 'key22': 'val22', + 'data l3': 'value l3', + 'data l4': 'value -->\xbf<--' + } + }, + 'Leaf 3': { + 'class': 'test33', + 'key33': 'val33', + 'data l5': 'value L5', + 'data l6': 'value l6' + }, + 'Leaf 4': { + 'class': 'test44', + 'key44': 'val44', + 'data 51': 'value 51', + 'data 52': 'value 52' + } + } + } + + p.open_file = start_file + result = {} + p.recursive_fun(result) + + self.assertTrue(result == expected) + + +if __name__ == '__main__': + unittest.main() From eea716a99ff76430751303044d423889e42c04e3 Mon Sep 17 00:00:00 2001 From: Aweinhof Date: Thu, 25 Sep 2025 17:56:44 +0200 Subject: [PATCH 17/17] Replaced exit calls, renamed class attributes and moved string_parser.py to utils --- .../utils/ioreg_parsers/structure_parser.py | 46 ++++++++----------- .../{ioreg_parsers => }/string_parser.py | 2 +- tests/test_string_parser.py | 2 +- 3 files changed, 21 insertions(+), 29 deletions(-) rename src/sysdiagnose/utils/{ioreg_parsers => }/string_parser.py (99%) diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py index 6e4a292..a174de3 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py +++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py @@ -1,10 +1,10 @@ from sysdiagnose.utils.base import logger -from sysdiagnose.utils.ioreg_parsers import string_parser +from sysdiagnose.utils import string_parser import re class IORegStructParser: - rollback_addr = None - line = None + __rollback_addr = None + __curr_line = None def __init__(self): pass @@ -19,23 +19,18 @@ def parse(self, file_path): return data_tree def get_line(self): - self.rollback_addr = self.open_file.tell() - self.line = self.open_file.readline() - self.line = self.line.replace('\n', '') + self.__rollback_addr = self.open_file.tell() + self.__curr_line = self.open_file.readline() + self.__curr_line = self.__curr_line.replace('\n', '') def recursive_call(self, data_tree: dict): - self.open_file.seek(self.rollback_addr) + self.open_file.seek(self.__rollback_addr) self.recursive_fun(data_tree) def check_start_node(self): - if '+-o' not in self.line: + if '+-o' not in self.__curr_line: logger.error('This is not normal. Recursive function called on random line.') - exit(1) - - def not_empty_node_check(self): - if not self.rollback_addr: - logger.error("+-o in two consecutive lines, not supposed to be possible") - exit(1) + raise Exception("File has an invalid structure, '+-o' tag was not found in first line") def check_key_uniqueness(self, dictio: dict, key: str): if dictio.get(key): @@ -45,12 +40,12 @@ def fetch_node_data(self, data_tree: dict) -> bool: node_data = [] # array of lines, to be transformed in json res = True - while '+-o' not in self.line: - if not self.line: # end of file + while '+-o' not in self.__curr_line: + if not self.__curr_line: # end of file res = False break - node_data.append(self.line) + node_data.append(self.__curr_line) self.get_line() data_dict = self.node_data_to_json(node_data) @@ -79,11 +74,11 @@ def dict_update(self, main_dict: dict, data_dict: dict): main_dict[key] = data_dict[key] def parse_title(self) -> tuple: - if "+-o" not in self.line: + if "+-o" not in self.__curr_line: logger.warning("'non-title' line given to title parser, should not happen") return "", "" - whole_title = self.line.split("+-o", 1)[1].strip() + whole_title = self.__curr_line.split("+-o", 1)[1].strip() if " format, to invesstigate") @@ -144,8 +139,8 @@ def node_data_to_json(self, data_array: list[str]) -> dict: return res def iterate_children(self, depth: int, data_tree: dict): - while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'): - if self.line[depth: depth + 3] == '+-o': + while self.__curr_line and (self.__curr_line[depth] == '|' or self.__curr_line[depth: depth + 3] == '+-o'): + if self.__curr_line[depth: depth + 3] == '+-o': name = self.parse_title()[0] new_child = self.setup_new_child(data_tree, name) self.recursive_call(new_child) @@ -185,11 +180,11 @@ def recursive_fun(self, data_tree: dict): self.dict_update(data_tree, additional_data) - depth = self.line.index('o') # to identify the other nodes that have the same parent + depth = self.__curr_line.index('o') # to identify the other nodes that have the same parent self.get_line() # check if its a leaf - if self.line[depth] != '|': + if self.__curr_line[depth] != '|': is_leaf = True # Fetch the data of the node @@ -198,11 +193,8 @@ def recursive_fun(self, data_tree: dict): # stop if we're a leaf if is_leaf: - self.open_file.seek(self.rollback_addr) + self.open_file.seek(self.__rollback_addr) return - # sanity check - self.not_empty_node_check() - # Iterates over each child to call the current function self.iterate_children(depth, data_tree) diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/string_parser.py similarity index 99% rename from src/sysdiagnose/utils/ioreg_parsers/string_parser.py rename to src/sysdiagnose/utils/string_parser.py index 4ec81e5..1c484bd 100644 --- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py +++ b/src/sysdiagnose/utils/string_parser.py @@ -316,7 +316,7 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l else: logger.error('Error : struct type not found') - exit(1) + raise ValueError("Structure passed has to be a dict, a list or a string. Type : " + str(type(final_struct))) # return is necessary, strings are not passed by reference in python return final_struct diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py index 6805a73..3f5da07 100644 --- a/tests/test_string_parser.py +++ b/tests/test_string_parser.py @@ -1,6 +1,6 @@ from tests import SysdiagnoseTestCase import unittest -import sysdiagnose.utils.ioreg_parsers.string_parser as sp +import sysdiagnose.utils.string_parser as sp class TestStringParser(SysdiagnoseTestCase):