From 81e2f355326ff98ed4180b1fa540f845581984fe Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Fri, 12 Sep 2025 12:08:36 +0200
Subject: [PATCH 01/17] Added recursive, basic version of IOService parser

---
 src/sysdiagnose/parsers/ioservice.py | 147 +++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 src/sysdiagnose/parsers/ioservice.py

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
new file mode 100644
index 0000000..1c34f17
--- /dev/null
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -0,0 +1,147 @@
+#! /usr/bin/env python3
+
+import os
+import string
+from tokenize import String
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger, Event
+from datetime import datetime
+
+
+class DemoParser(BaseParserInterface):
+    description = "Demo parsers"
+    format = "json"  # by default json, use jsonl for event-based data
+    rollback_addr = None
+    line = None
+    open_file = None
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IOServiceTestData.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        '''
+        this is the function that will be called
+        '''
+        result = []
+        log_files = self.get_log_files()
+        for log_file in log_files:
+            entry = {}
+            try:
+                timestamp = datetime.strptime('1980-01-01 12:34:56.001 +00:00', '%Y-%m-%d %H:%M:%S.%f %z')  # moment of interest
+                event = Event(
+                    datetime=timestamp,
+                    message=f"Demo event from {log_file}",  # String with an informative message of the event
+                    module=self.module_name,
+                    timestamp_desc='Demo timestamp',        # String explaining what type of timestamp it is for example file created
+                )
+
+                self.parse_file(log_file)
+
+                result.append(event.to_dict())
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                logger.debug(f"Entry details {str(entry)}", extra={'entry': str(entry)})
+                if not entry:
+                    logger.warning("Empty entry.")
+                    
+            except Exception:
+                logger.exception("Got an exception !")
+                
+        return result
+    
+    def parse_file(self, file: string):
+        """           IOService file notes
+
+            # Regex for +-o starting at start of file -> 1213 results
+            (\s|\|)*\+-o
+
+            # Regex for ALL +-o - 1213 results
+            \+-o
+
+            So we know that the data doesn't contain the node identifier ('+-o')
+
+        """
+        print('===============================')
+        with open(file, 'r') as f:
+            self.open_file = f
+            self.recursive_fun()
+            self.open_file = None
+        print('===============================')
+
+    def get_line(self):
+        self.rollback_addr = self.open_file.tell()
+        self.line = self.open_file.readline().replace('\n', '')
+
+    def recursive_call(self):
+        self.open_file.seek(self.rollback_addr)
+        self.recursive_fun()
+
+    def check_start_node(self):
+        if '+-o' not in self.line:
+            logger.error('This is not normal. Recursive function called on random line.')
+            exit(1)
+
+    def not_empty_node_check(self):
+        if not self.rollback_addr:
+            logger.error("+-o in two consecutive lines, not supposed to be possible")
+            exit(1)
+
+    def iterate_children(self, depth):
+        while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'):
+            if self.line[depth: depth+3] == '+-o':
+                self.recursive_call()
+
+            else:
+                self.get_line()
+
+    def fetch_node_data(self):
+        while '+-o' not in self.line:
+            if not self.line:
+                return False # end of file
+            
+            node_data = [] # array of lines, to be transformed in json
+            node_data.append(self.line)
+            self.get_line()
+
+        return True
+    
+    def recursive_fun(self):
+        is_leaf = False
+        self.get_line()
+
+        # check if we're at the start of a node
+        self.check_start_node()
+
+        node_name = self.line.split("+-o")[1].strip()
+        print("Node : ", node_name)
+        depth = self.line.index('o') # to identify the other nodes that have the same parent
+        self.get_line()
+
+        # check if its a leaf
+        if self.line[depth] != '|':
+            is_leaf = True
+
+        # Fetch the data of the node
+        if not self.fetch_node_data():
+            return  # EOF
+
+        # stop if we're a leaf
+        if is_leaf:
+            self.open_file.seek(self.rollback_addr)
+            return
+            
+        # sanity check
+        self.not_empty_node_check()
+
+        # going back one line to retrieve the node title line
+        self.recursive_call()
+        self.get_line()
+
+        # Iterates over each child to call the current function
+        self.iterate_children(depth)
+        
+        
+
+

From 2a7131fc620fb5381771bc3ee2d467f2759d2f09 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Fri, 12 Sep 2025 16:53:43 +0200
Subject: [PATCH 02/17] IOService parser now recreates the tree in a json
 format

---
 src/sysdiagnose/parsers/ioservice.py | 100 ++++++++++++++++-----------
 1 file changed, 58 insertions(+), 42 deletions(-)

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index 1c34f17..2030051 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -1,18 +1,22 @@
 #! /usr/bin/env python3
 
+import array
+from ctypes import Array
 import os
 import string
 from tokenize import String
-from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger, Event
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
 from datetime import datetime
+import re
 
 
-class DemoParser(BaseParserInterface):
-    description = "Demo parsers"
-    format = "json"  # by default json, use jsonl for event-based data
+class IOServiceParser(BaseParserInterface):
+    description = "IOService.txt file parser"
+    format = "json"
     rollback_addr = None
     line = None
     open_file = None
+    
 
     def __init__(self, config: SysdiagnoseConfig, case_id: str):
         super().__init__(__file__, config, case_id)
@@ -22,36 +26,20 @@ def get_log_files(self) -> list:
         return [os.path.join(self.case_data_subfolder, log_file)]
 
     def execute(self) -> list | dict:
-        '''
-        this is the function that will be called
-        '''
-        result = []
         log_files = self.get_log_files()
+        data_tree = {}
+
         for log_file in log_files:
-            entry = {}
             try:
-                timestamp = datetime.strptime('1980-01-01 12:34:56.001 +00:00', '%Y-%m-%d %H:%M:%S.%f %z')  # moment of interest
-                event = Event(
-                    datetime=timestamp,
-                    message=f"Demo event from {log_file}",  # String with an informative message of the event
-                    module=self.module_name,
-                    timestamp_desc='Demo timestamp',        # String explaining what type of timestamp it is for example file created
-                )
-
-                self.parse_file(log_file)
-
-                result.append(event.to_dict())
                 logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
-                logger.debug(f"Entry details {str(entry)}", extra={'entry': str(entry)})
-                if not entry:
-                    logger.warning("Empty entry.")
+                self.parse_file(log_file, data_tree)
                     
             except Exception:
                 logger.exception("Got an exception !")
                 
-        return result
+        return data_tree
     
-    def parse_file(self, file: string):
+    def parse_file(self, file: str, data_tree: dict):
         """           IOService file notes
 
             # Regex for +-o starting at start of file -> 1213 results
@@ -66,7 +54,7 @@ def parse_file(self, file: string):
         print('===============================')
         with open(file, 'r') as f:
             self.open_file = f
-            self.recursive_fun()
+            self.recursive_fun(data_tree)
             self.open_file = None
         print('===============================')
 
@@ -74,9 +62,9 @@ def get_line(self):
         self.rollback_addr = self.open_file.tell()
         self.line = self.open_file.readline().replace('\n', '')
 
-    def recursive_call(self):
+    def recursive_call(self, data_tree: dict):
         self.open_file.seek(self.rollback_addr)
-        self.recursive_fun()
+        self.recursive_fun(data_tree)
 
     def check_start_node(self):
         if '+-o' not in self.line:
@@ -88,26 +76,56 @@ def not_empty_node_check(self):
             logger.error("+-o in two consecutive lines, not supposed to be possible")
             exit(1)
 
-    def iterate_children(self, depth):
+    def iterate_children(self, depth: int, data_tree_list: list[dict]):
         while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'):
             if self.line[depth: depth+3] == '+-o':
-                self.recursive_call()
+                data_tree_list.append({})
+                self.recursive_call(data_tree_list[-1])
 
             else:
                 self.get_line()
 
-    def fetch_node_data(self):
+    def check_key_uniqueness(self, dictio, key):
+        if dictio.get(key):
+            logger.warning('Key is already in dictionary, data may be lost')
+
+    def fetch_node_data(self, data_tree):
+        node_data = [] # array of lines, to be transformed in json
+        res = True
+
         while '+-o' not in self.line:
-            if not self.line:
-                return False # end of file
+            if not self.line:   # end of file
+                res = False
+                break
             
-            node_data = [] # array of lines, to be transformed in json
             node_data.append(self.line)
             self.get_line()
 
-        return True
+        data_tree['Data'] = self.node_data_to_json(node_data)
+        return res
+    
+    def node_data_to_json(self, data_array: list[str]) -> dict:
+        res = {}
+        for data in data_array:
+            # remove spaces and pipes at start
+            clean_line = re.sub('^(\s|\|)*', '', data)
+
+            if '=' not in clean_line:
+                continue
+
+            # split at the first equal only
+            key, value = clean_line.split('=', 1)
+
+            # remove first and last " (in case the key has more quotes inside)
+            key = key.replace('"', '', 1)
+            key = key[::-1].replace('"', '', 1)[::-1]
+
+            self.check_key_uniqueness(res, key)
+            res[key.strip()] = value.strip()
+        
+        return res
     
-    def recursive_fun(self):
+    def recursive_fun(self, data_tree: dict):
         is_leaf = False
         self.get_line()
 
@@ -116,6 +134,8 @@ def recursive_fun(self):
 
         node_name = self.line.split("+-o")[1].strip()
         print("Node : ", node_name)
+        data_tree['Name'] = node_name
+        data_tree['Children'] = []
         depth = self.line.index('o') # to identify the other nodes that have the same parent
         self.get_line()
 
@@ -124,7 +144,7 @@ def recursive_fun(self):
             is_leaf = True
 
         # Fetch the data of the node
-        if not self.fetch_node_data():
+        if not self.fetch_node_data(data_tree):
             return  # EOF
 
         # stop if we're a leaf
@@ -135,12 +155,8 @@ def recursive_fun(self):
         # sanity check
         self.not_empty_node_check()
 
-        # going back one line to retrieve the node title line
-        self.recursive_call()
-        self.get_line()
-
         # Iterates over each child to call the current function
-        self.iterate_children(depth)
+        self.iterate_children(depth, data_tree['Children'])
         
         
 

From 333cbdaa391fa7716ad55703d9c612bfef270005 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Fri, 12 Sep 2025 18:26:37 +0200
Subject: [PATCH 03/17] IOService parser now handles anomalies

---
 src/sysdiagnose/parsers/ioservice.py | 34 +++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index 2030051..3319ed4 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -22,7 +22,7 @@ def __init__(self, config: SysdiagnoseConfig, case_id: str):
         super().__init__(__file__, config, case_id)
 
     def get_log_files(self) -> list:
-        log_file = "ioreg/IOServiceTestData.txt"
+        log_file = "ioreg/IOServiceTestData2.txt"
         return [os.path.join(self.case_data_subfolder, log_file)]
 
     def execute(self) -> list | dict:
@@ -35,7 +35,7 @@ def execute(self) -> list | dict:
                 self.parse_file(log_file, data_tree)
                     
             except Exception:
-                logger.exception("Got an exception !")
+                logger.exception("IOService parsing crashed")
                 
         return data_tree
     
@@ -51,12 +51,10 @@ def parse_file(self, file: str, data_tree: dict):
             So we know that the data doesn't contain the node identifier ('+-o')
 
         """
-        print('===============================')
         with open(file, 'r') as f:
             self.open_file = f
             self.recursive_fun(data_tree)
             self.open_file = None
-        print('===============================')
 
     def get_line(self):
         self.rollback_addr = self.open_file.tell()
@@ -104,9 +102,33 @@ def fetch_node_data(self, data_tree):
         data_tree['Data'] = self.node_data_to_json(node_data)
         return res
     
+    def handle_anomalies(self, dictio, data, key):
+        """
+            some values overflow on the few next lines
+            this condition assumes there is no '=' in the exceeding data
+            (which was the case up to what I saw)
+
+            p.s. :  if you wonder why cond4 is necessary, it is only for 
+                    the last leaf, which has no '|' symbols. without cond4,
+                    these lines would be seen as anomalies
+        """
+        cond1 = not re.search('^\s*\|+', data)
+        cond2 = len(data.strip()) > 0
+        cond3 = data.strip() not in ('{', '}')
+        cond4 = '=' not in data
+
+        if cond1 and cond2 and cond3 and cond4:
+            dictio[key] += data.strip()
+            return True
+        return False
+    
     def node_data_to_json(self, data_array: list[str]) -> dict:
         res = {}
+        key = None
+
         for data in data_array:
+            self.handle_anomalies(res, data, key)
+
             # remove spaces and pipes at start
             clean_line = re.sub('^(\s|\|)*', '', data)
 
@@ -119,9 +141,10 @@ def node_data_to_json(self, data_array: list[str]) -> dict:
             # remove first and last " (in case the key has more quotes inside)
             key = key.replace('"', '', 1)
             key = key[::-1].replace('"', '', 1)[::-1]
+            key = key.strip()
 
             self.check_key_uniqueness(res, key)
-            res[key.strip()] = value.strip()
+            res[key] = value.strip()
         
         return res
     
@@ -133,7 +156,6 @@ def recursive_fun(self, data_tree: dict):
         self.check_start_node()
 
         node_name = self.line.split("+-o")[1].strip()
-        print("Node : ", node_name)
         data_tree['Name'] = node_name
         data_tree['Children'] = []
         depth = self.line.index('o') # to identify the other nodes that have the same parent

From 7484620bac12b612b720194da145a6554540429f Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Mon, 15 Sep 2025 11:28:20 +0200
Subject: [PATCH 04/17] Fixed an anomaly where a non-ascii byte stands in the
 IOService.txt file

---
 src/sysdiagnose/parsers/ioservice.py | 59 ++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index 3319ed4..9d68075 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -2,8 +2,8 @@
 
 import array
 from ctypes import Array
+from io import BufferedReader
 import os
-import string
 from tokenize import String
 from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
 from datetime import datetime
@@ -22,7 +22,7 @@ def __init__(self, config: SysdiagnoseConfig, case_id: str):
         super().__init__(__file__, config, case_id)
 
     def get_log_files(self) -> list:
-        log_file = "ioreg/IOServiceTestData2.txt"
+        log_file = "ioreg/IOService.txt"
         return [os.path.join(self.case_data_subfolder, log_file)]
 
     def execute(self) -> list | dict:
@@ -51,14 +51,41 @@ def parse_file(self, file: str, data_tree: dict):
             So we know that the data doesn't contain the node identifier ('+-o')
 
         """
-        with open(file, 'r') as f:
+        with open(file, 'rb') as f:
             self.open_file = f
             self.recursive_fun(data_tree)
             self.open_file = None
 
     def get_line(self):
         self.rollback_addr = self.open_file.tell()
-        self.line = self.open_file.readline().replace('\n', '')
+        self.line = self.safe_readline(self.open_file)
+        self.line = self.line.replace('\n', '')
+
+    def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'):
+        """
+        Simulates readline() in binary mode, replacing non-ASCII bytes.
+
+        This fixes an anomaly where a non-ascii (non-utf-8-) byte is present in the IOService.txt file
+        (line 10797 in the testdata)
+        """
+        buffer = ""
+
+        while True:
+            byte = open_file.read(1)
+
+            if not byte: # EOF
+                return buffer
+
+            if byte == b'\n':
+                return buffer
+            else:
+                # Check if ASCII (0–127), else replace
+                if byte[0] < 128:
+                    buffer += chr(byte[0])
+                else:
+                    buffer += replacement_char[0]
+                #buffer.append(byte[0] if ord(byte[0]) < 128 else replacement_byte[0])
+
 
     def recursive_call(self, data_tree: dict):
         self.open_file.seek(self.rollback_addr)
@@ -74,20 +101,11 @@ def not_empty_node_check(self):
             logger.error("+-o in two consecutive lines, not supposed to be possible")
             exit(1)
 
-    def iterate_children(self, depth: int, data_tree_list: list[dict]):
-        while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'):
-            if self.line[depth: depth+3] == '+-o':
-                data_tree_list.append({})
-                self.recursive_call(data_tree_list[-1])
-
-            else:
-                self.get_line()
-
-    def check_key_uniqueness(self, dictio, key):
+    def check_key_uniqueness(self, dictio: dict, key: str):
         if dictio.get(key):
             logger.warning('Key is already in dictionary, data may be lost')
 
-    def fetch_node_data(self, data_tree):
+    def fetch_node_data(self, data_tree: dict) -> bool:
         node_data = [] # array of lines, to be transformed in json
         res = True
 
@@ -102,7 +120,7 @@ def fetch_node_data(self, data_tree):
         data_tree['Data'] = self.node_data_to_json(node_data)
         return res
     
-    def handle_anomalies(self, dictio, data, key):
+    def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
         """
             some values overflow on the few next lines
             this condition assumes there is no '=' in the exceeding data
@@ -148,6 +166,15 @@ def node_data_to_json(self, data_array: list[str]) -> dict:
         
         return res
     
+    def iterate_children(self, depth: int, data_tree_list: list[dict]):
+        while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'):
+            if self.line[depth: depth+3] == '+-o':
+                data_tree_list.append({})
+                self.recursive_call(data_tree_list[-1])
+
+            else:
+                self.get_line()
+    
     def recursive_fun(self, data_tree: dict):
         is_leaf = False
         self.get_line()

From 5e03cecf40bbd5483d240e3bf0281f1c01528b71 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Mon, 15 Sep 2025 12:31:50 +0200
Subject: [PATCH 05/17] Added unit testing for ioservice parser

---
 .vscode/launch.json             |   8 +
 tests/test_parsers_ioservice.py | 363 ++++++++++++++++++++++++++++++++
 2 files changed, 371 insertions(+)
 create mode 100644 tests/test_parsers_ioservice.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
index a514352..a2be49f 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -316,6 +316,14 @@
             "module": "sysdiagnose.__main__",
             "args": "-c public parse swcutil",
             "cwd": "${workspaceFolder}/"
+        },
+        {
+            "name": "Python Debugger: parse ioservice",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "sysdiagnose.__main__",
+            "args": "-c public -l DEBUG parse ioservice",
+            "cwd": "${workspaceFolder}/"
         }
     ]
 }
\ No newline at end of file
diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py
new file mode 100644
index 0000000..2243d19
--- /dev/null
+++ b/tests/test_parsers_ioservice.py
@@ -0,0 +1,363 @@
+from sysdiagnose.parsers.ioservice import IOServiceParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+
+
+class TestParsersIOService(SysdiagnoseTestCase):
+
+    def test_basic_structure(self):
+        for case_id, _ in self.sd.cases().items():
+            p = IOServiceParser(self.sd.config, case_id=case_id)
+        
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.BytesIO(b"""+-o Root node
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")
+        
+        expected = {
+            "Children": [
+                {
+                "Children": [
+                    {
+                    "Children": [
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l1": "\"value l1\"",
+                            "data l2": "\"value l2\""
+                        },
+                        "Name": "Leaf 1"
+                        },
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l3": "\"value l3\"",
+                            "data l4": "\"value l4\""
+                        },
+                        "Name": "Leaf 2"
+                        }
+                    ],
+                    "Data": {
+                        "data 31": "\"value 31\"",
+                        "data 32": "\"value 32\""
+                    },
+                    "Name": "Node 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data l5": "\"value L5\"",
+                        "data l6": "\"value l6\""
+                    },
+                    "Name": "Leaf 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data 51": "\"value 51\"",
+                        "data 52": "\"value 52\""
+                    },
+                    "Name": "Leaf 4"
+                    }
+                ],
+                "Data": {
+                    "#address-cells": "<02000000>",
+                    "AAPL,phandle": "<01000000>"
+                },
+                "Name": "Node 2"
+                }
+            ],
+            "Data": {
+                "data 1": "\"value 1\"",
+                "data 2": "\"value 2\""
+            },
+            "Name": "Root node"
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        for case_id, _ in self.sd.cases().items():
+            p = IOServiceParser(self.sd.config, case_id=case_id)
+        
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.BytesIO(b"""+-o Root node
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")
+        
+        expected = {
+            "Children": [
+                {
+                "Children": [
+                    {
+                    "Children": [
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l1": "\"value l1\"",
+                            "data l2": "\"value l2\""
+                        },
+                        "Name": "Leaf 1"
+                        },
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l3": "\"value l3\"",
+                            "data l4": "\"value aaaabbbbccccdddd\""
+                        },
+                        "Name": "Leaf 2"
+                        }
+                    ],
+                    "Data": {
+                        "data 31": "\"value 31\"",
+                        "data 32": "\"value 32\""
+                    },
+                    "Name": "Node 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data l5": "\"value L5\"",
+                        "data l6": "\"value l6\""
+                    },
+                    "Name": "Leaf 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data 51": "\"value 51\"",
+                        "data 52": "\"value 52\""
+                    },
+                    "Name": "Leaf 4"
+                    }
+                ],
+                "Data": {
+                    "#address-cells": "<02000000>",
+                    "AAPL,phandle": "<01000000>"
+                },
+                "Name": "Node 2"
+                }
+            ],
+            "Data": {
+                "data 1": "\"value 1\"",
+                "data 2": "\"value 2\""
+            },
+            "Name": "Root node"
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        for case_id, _ in self.sd.cases().items():
+            p = IOServiceParser(self.sd.config, case_id=case_id)
+        
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.BytesIO(b"""+-o Root node
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")
+        
+        expected = {
+            "Children": [
+                {
+                "Children": [
+                    {
+                    "Children": [
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l1": "\"value l1\"",
+                            "data l2": "\"value l2\""
+                        },
+                        "Name": "Leaf 1"
+                        },
+                        {
+                        "Children": [],
+                        "Data": {
+                            "data l3": "\"value l3\"",
+                            "data l4": "\"value -->?<--\""
+                        },
+                        "Name": "Leaf 2"
+                        }
+                    ],
+                    "Data": {
+                        "data 31": "\"value 31\"",
+                        "data 32": "\"value 32\""
+                    },
+                    "Name": "Node 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data l5": "\"value L5\"",
+                        "data l6": "\"value l6\""
+                    },
+                    "Name": "Leaf 3"
+                    },
+                    {
+                    "Children": [],
+                    "Data": {
+                        "data 51": "\"value 51\"",
+                        "data 52": "\"value 52\""
+                    },
+                    "Name": "Leaf 4"
+                    }
+                ],
+                "Data": {
+                    "#address-cells": "<02000000>",
+                    "AAPL,phandle": "<01000000>"
+                },
+                "Name": "Node 2"
+                }
+            ],
+            "Data": {
+                "data 1": "\"value 1\"",
+                "data 2": "\"value 2\""
+            },
+            "Name": "Root node"
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 25145b8a8284bf5eb5a5ef112a5da9b63260715e Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Mon, 15 Sep 2025 14:22:05 +0200
Subject: [PATCH 06/17] Fixed codestyling of ioservice and its unit testing

---
 src/sysdiagnose/parsers/ioservice.py |  51 +++---
 tests/test_parsers_ioservice.py      | 252 +++++++++++++--------------
 2 files changed, 146 insertions(+), 157 deletions(-)

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index 9d68075..cad812d 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -1,13 +1,9 @@
 #! /usr/bin/env python3
 
-import array
-from ctypes import Array
 from io import BufferedReader
 import os
-from tokenize import String
-from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
-from datetime import datetime
 import re
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
 
 
 class IOServiceParser(BaseParserInterface):
@@ -16,7 +12,6 @@ class IOServiceParser(BaseParserInterface):
     rollback_addr = None
     line = None
     open_file = None
-    
 
     def __init__(self, config: SysdiagnoseConfig, case_id: str):
         super().__init__(__file__, config, case_id)
@@ -33,12 +28,12 @@ def execute(self) -> list | dict:
             try:
                 logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
                 self.parse_file(log_file, data_tree)
-                    
+
             except Exception:
                 logger.exception("IOService parsing crashed")
-                
+
         return data_tree
-    
+
     def parse_file(self, file: str, data_tree: dict):
         """           IOService file notes
 
@@ -50,7 +45,7 @@ def parse_file(self, file: str, data_tree: dict):
 
             So we know that the data doesn't contain the node identifier ('+-o')
 
-        """
+        """  # noqa: W605
         with open(file, 'rb') as f:
             self.open_file = f
             self.recursive_fun(data_tree)
@@ -73,7 +68,7 @@ def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'):
         while True:
             byte = open_file.read(1)
 
-            if not byte: # EOF
+            if not byte:  # EOF
                 return buffer
 
             if byte == b'\n':
@@ -84,8 +79,6 @@ def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'):
                     buffer += chr(byte[0])
                 else:
                     buffer += replacement_char[0]
-                #buffer.append(byte[0] if ord(byte[0]) < 128 else replacement_byte[0])
-
 
     def recursive_call(self, data_tree: dict):
         self.open_file.seek(self.rollback_addr)
@@ -106,31 +99,31 @@ def check_key_uniqueness(self, dictio: dict, key: str):
             logger.warning('Key is already in dictionary, data may be lost')
 
     def fetch_node_data(self, data_tree: dict) -> bool:
-        node_data = [] # array of lines, to be transformed in json
+        node_data = []  # array of lines, to be transformed in json
         res = True
 
         while '+-o' not in self.line:
             if not self.line:   # end of file
                 res = False
                 break
-            
+
             node_data.append(self.line)
             self.get_line()
 
         data_tree['Data'] = self.node_data_to_json(node_data)
         return res
-    
+
     def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
         """
             some values overflow on the few next lines
             this condition assumes there is no '=' in the exceeding data
             (which was the case up to what I saw)
 
-            p.s. :  if you wonder why cond4 is necessary, it is only for 
+            p.s. :  if you wonder why cond4 is necessary, it is only for
                     the last leaf, which has no '|' symbols. without cond4,
                     these lines would be seen as anomalies
         """
-        cond1 = not re.search('^\s*\|+', data)
+        cond1 = not re.search(r'^\s*\|+', data)
         cond2 = len(data.strip()) > 0
         cond3 = data.strip() not in ('{', '}')
         cond4 = '=' not in data
@@ -139,7 +132,7 @@ def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
             dictio[key] += data.strip()
             return True
         return False
-    
+
     def node_data_to_json(self, data_array: list[str]) -> dict:
         res = {}
         key = None
@@ -148,7 +141,7 @@ def node_data_to_json(self, data_array: list[str]) -> dict:
             self.handle_anomalies(res, data, key)
 
             # remove spaces and pipes at start
-            clean_line = re.sub('^(\s|\|)*', '', data)
+            clean_line = re.sub(r'^(\s|\|)*', '', data)
 
             if '=' not in clean_line:
                 continue
@@ -163,18 +156,18 @@ def node_data_to_json(self, data_array: list[str]) -> dict:
 
             self.check_key_uniqueness(res, key)
             res[key] = value.strip()
-        
+
         return res
-    
+
     def iterate_children(self, depth: int, data_tree_list: list[dict]):
-        while self.line and (self.line[depth] == '|' or self.line[depth: depth+3] == '+-o'):
-            if self.line[depth: depth+3] == '+-o':
+        while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'):
+            if self.line[depth: depth + 3] == '+-o':
                 data_tree_list.append({})
                 self.recursive_call(data_tree_list[-1])
 
             else:
                 self.get_line()
-    
+
     def recursive_fun(self, data_tree: dict):
         is_leaf = False
         self.get_line()
@@ -185,7 +178,7 @@ def recursive_fun(self, data_tree: dict):
         node_name = self.line.split("+-o")[1].strip()
         data_tree['Name'] = node_name
         data_tree['Children'] = []
-        depth = self.line.index('o') # to identify the other nodes that have the same parent
+        depth = self.line.index('o')  # to identify the other nodes that have the same parent
         self.get_line()
 
         # check if its a leaf
@@ -200,13 +193,9 @@ def recursive_fun(self, data_tree: dict):
         if is_leaf:
             self.open_file.seek(self.rollback_addr)
             return
-            
+
         # sanity check
         self.not_empty_node_check()
 
         # Iterates over each child to call the current function
         self.iterate_children(depth, data_tree['Children'])
-        
-        
-
-
diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py
index 2243d19..b402aca 100644
--- a/tests/test_parsers_ioservice.py
+++ b/tests/test_parsers_ioservice.py
@@ -9,7 +9,7 @@ class TestParsersIOService(SysdiagnoseTestCase):
     def test_basic_structure(self):
         for case_id, _ in self.sd.cases().items():
             p = IOServiceParser(self.sd.config, case_id=case_id)
-        
+
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
         start_file = io.BytesIO(b"""+-o Root node
@@ -54,59 +54,59 @@ def test_basic_structure(self):
           "data 52" = "value 52"
         }
         
-""")
-        
+""")  # noqa: W291, W293
+
         expected = {
             "Children": [
                 {
-                "Children": [
-                    {
                     "Children": [
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l1": "\"value l1\"",
-                            "data l2": "\"value l2\""
-                        },
-                        "Name": "Leaf 1"
+                            "Children": [
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l1": "\"value l1\"",
+                                        "data l2": "\"value l2\""
+                                    },
+                                    "Name": "Leaf 1"
+                                },
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l3": "\"value l3\"",
+                                        "data l4": "\"value l4\""
+                                    },
+                                    "Name": "Leaf 2"
+                                }
+                            ],
+                            "Data": {
+                                "data 31": "\"value 31\"",
+                                "data 32": "\"value 32\""
+                            },
+                            "Name": "Node 3"
                         },
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l3": "\"value l3\"",
-                            "data l4": "\"value l4\""
+                            "Children": [],
+                            "Data": {
+                                "data l5": "\"value L5\"",
+                                "data l6": "\"value l6\""
+                            },
+                            "Name": "Leaf 3"
                         },
-                        "Name": "Leaf 2"
+                        {
+                            "Children": [],
+                            "Data": {
+                                "data 51": "\"value 51\"",
+                                "data 52": "\"value 52\""
+                            },
+                            "Name": "Leaf 4"
                         }
                     ],
                     "Data": {
-                        "data 31": "\"value 31\"",
-                        "data 32": "\"value 32\""
-                    },
-                    "Name": "Node 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data l5": "\"value L5\"",
-                        "data l6": "\"value l6\""
+                        "#address-cells": "<02000000>",
+                        "AAPL,phandle": "<01000000>"
                     },
-                    "Name": "Leaf 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data 51": "\"value 51\"",
-                        "data 52": "\"value 52\""
-                    },
-                    "Name": "Leaf 4"
-                    }
-                ],
-                "Data": {
-                    "#address-cells": "<02000000>",
-                    "AAPL,phandle": "<01000000>"
-                },
-                "Name": "Node 2"
+                    "Name": "Node 2"
                 }
             ],
             "Data": {
@@ -125,7 +125,7 @@ def test_basic_structure(self):
     def test_value_overflow_anomaly(self):
         for case_id, _ in self.sd.cases().items():
             p = IOServiceParser(self.sd.config, case_id=case_id)
-        
+
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
         start_file = io.BytesIO(b"""+-o Root node
@@ -174,59 +174,59 @@ def test_value_overflow_anomaly(self):
           "data 52" = "value 52"
         }
         
-""")
-        
+""")  # noqa: W291, W293
+
         expected = {
             "Children": [
                 {
-                "Children": [
-                    {
                     "Children": [
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l1": "\"value l1\"",
-                            "data l2": "\"value l2\""
-                        },
-                        "Name": "Leaf 1"
+                            "Children": [
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l1": "\"value l1\"",
+                                        "data l2": "\"value l2\""
+                                    },
+                                    "Name": "Leaf 1"
+                                },
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l3": "\"value l3\"",
+                                        "data l4": "\"value aaaabbbbccccdddd\""
+                                    },
+                                    "Name": "Leaf 2"
+                                }
+                            ],
+                            "Data": {
+                                "data 31": "\"value 31\"",
+                                "data 32": "\"value 32\""
+                            },
+                            "Name": "Node 3"
                         },
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l3": "\"value l3\"",
-                            "data l4": "\"value aaaabbbbccccdddd\""
+                            "Children": [],
+                            "Data": {
+                                "data l5": "\"value L5\"",
+                                "data l6": "\"value l6\""
+                            },
+                            "Name": "Leaf 3"
                         },
-                        "Name": "Leaf 2"
+                        {
+                            "Children": [],
+                            "Data": {
+                                "data 51": "\"value 51\"",
+                                "data 52": "\"value 52\""
+                            },
+                            "Name": "Leaf 4"
                         }
                     ],
                     "Data": {
-                        "data 31": "\"value 31\"",
-                        "data 32": "\"value 32\""
+                        "#address-cells": "<02000000>",
+                        "AAPL,phandle": "<01000000>"
                     },
-                    "Name": "Node 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data l5": "\"value L5\"",
-                        "data l6": "\"value l6\""
-                    },
-                    "Name": "Leaf 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data 51": "\"value 51\"",
-                        "data 52": "\"value 52\""
-                    },
-                    "Name": "Leaf 4"
-                    }
-                ],
-                "Data": {
-                    "#address-cells": "<02000000>",
-                    "AAPL,phandle": "<01000000>"
-                },
-                "Name": "Node 2"
+                    "Name": "Node 2"
                 }
             ],
             "Data": {
@@ -245,7 +245,7 @@ def test_value_overflow_anomaly(self):
     def test_non_ascii_byte_anomaly(self):
         for case_id, _ in self.sd.cases().items():
             p = IOServiceParser(self.sd.config, case_id=case_id)
-        
+
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
         start_file = io.BytesIO(b"""+-o Root node
@@ -290,59 +290,59 @@ def test_non_ascii_byte_anomaly(self):
           "data 52" = "value 52"
         }
         
-""")
-        
+""")  # noqa: W291, W293
+
         expected = {
             "Children": [
                 {
-                "Children": [
-                    {
                     "Children": [
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l1": "\"value l1\"",
-                            "data l2": "\"value l2\""
-                        },
-                        "Name": "Leaf 1"
+                            "Children": [
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l1": "\"value l1\"",
+                                        "data l2": "\"value l2\""
+                                    },
+                                    "Name": "Leaf 1"
+                                },
+                                {
+                                    "Children": [],
+                                    "Data": {
+                                        "data l3": "\"value l3\"",
+                                        "data l4": "\"value -->?<--\""
+                                    },
+                                    "Name": "Leaf 2"
+                                }
+                            ],
+                            "Data": {
+                                "data 31": "\"value 31\"",
+                                "data 32": "\"value 32\""
+                            },
+                            "Name": "Node 3"
                         },
                         {
-                        "Children": [],
-                        "Data": {
-                            "data l3": "\"value l3\"",
-                            "data l4": "\"value -->?<--\""
+                            "Children": [],
+                            "Data": {
+                                "data l5": "\"value L5\"",
+                                "data l6": "\"value l6\""
+                            },
+                            "Name": "Leaf 3"
                         },
-                        "Name": "Leaf 2"
+                        {
+                            "Children": [],
+                            "Data": {
+                                "data 51": "\"value 51\"",
+                                "data 52": "\"value 52\""
+                            },
+                            "Name": "Leaf 4"
                         }
                     ],
                     "Data": {
-                        "data 31": "\"value 31\"",
-                        "data 32": "\"value 32\""
-                    },
-                    "Name": "Node 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data l5": "\"value L5\"",
-                        "data l6": "\"value l6\""
-                    },
-                    "Name": "Leaf 3"
-                    },
-                    {
-                    "Children": [],
-                    "Data": {
-                        "data 51": "\"value 51\"",
-                        "data 52": "\"value 52\""
+                        "#address-cells": "<02000000>",
+                        "AAPL,phandle": "<01000000>"
                     },
-                    "Name": "Leaf 4"
-                    }
-                ],
-                "Data": {
-                    "#address-cells": "<02000000>",
-                    "AAPL,phandle": "<01000000>"
-                },
-                "Name": "Node 2"
+                    "Name": "Node 2"
                 }
             ],
             "Data": {

From ebb74a67d492e9e1b4fe77f98deec0519e49e06e Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Tue, 16 Sep 2025 17:42:37 +0200
Subject: [PATCH 07/17] Small refactor of the resulting data structure in the
 ioservice parser

---
 src/sysdiagnose/parsers/ioservice.py          | 179 ++----------------
 .../utils/ioreg_parsers/string_parser.py      |   9 +
 .../utils/ioreg_parsers/structure_parser.py   | 172 +++++++++++++++++
 3 files changed, 192 insertions(+), 168 deletions(-)
 create mode 100644 src/sysdiagnose/utils/ioreg_parsers/string_parser.py
 create mode 100644 src/sysdiagnose/utils/ioreg_parsers/structure_parser.py

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index cad812d..b0c4ec3 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -1,17 +1,13 @@
 #! /usr/bin/env python3
 
-from io import BufferedReader
 import os
-import re
 from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
 
 
 class IOServiceParser(BaseParserInterface):
     description = "IOService.txt file parser"
     format = "json"
-    rollback_addr = None
-    line = None
-    open_file = None
 
     def __init__(self, config: SysdiagnoseConfig, case_id: str):
         super().__init__(__file__, config, case_id)
@@ -21,20 +17,6 @@ def get_log_files(self) -> list:
         return [os.path.join(self.case_data_subfolder, log_file)]
 
     def execute(self) -> list | dict:
-        log_files = self.get_log_files()
-        data_tree = {}
-
-        for log_file in log_files:
-            try:
-                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
-                self.parse_file(log_file, data_tree)
-
-            except Exception:
-                logger.exception("IOService parsing crashed")
-
-        return data_tree
-
-    def parse_file(self, file: str, data_tree: dict):
         """           IOService file notes
 
             # Regex for +-o starting at start of file -> 1213 results
@@ -46,156 +28,17 @@ def parse_file(self, file: str, data_tree: dict):
             So we know that the data doesn't contain the node identifier ('+-o')
 
         """  # noqa: W605
-        with open(file, 'rb') as f:
-            self.open_file = f
-            self.recursive_fun(data_tree)
-            self.open_file = None
-
-    def get_line(self):
-        self.rollback_addr = self.open_file.tell()
-        self.line = self.safe_readline(self.open_file)
-        self.line = self.line.replace('\n', '')
-
-    def safe_readline(self, open_file: BufferedReader, replacement_char: str = '?'):
-        """
-        Simulates readline() in binary mode, replacing non-ASCII bytes.
-
-        This fixes an anomaly where a non-ascii (non-utf-8-) byte is present in the IOService.txt file
-        (line 10797 in the testdata)
-        """
-        buffer = ""
-
-        while True:
-            byte = open_file.read(1)
-
-            if not byte:  # EOF
-                return buffer
-
-            if byte == b'\n':
-                return buffer
-            else:
-                # Check if ASCII (0–127), else replace
-                if byte[0] < 128:
-                    buffer += chr(byte[0])
-                else:
-                    buffer += replacement_char[0]
-
-    def recursive_call(self, data_tree: dict):
-        self.open_file.seek(self.rollback_addr)
-        self.recursive_fun(data_tree)
-
-    def check_start_node(self):
-        if '+-o' not in self.line:
-            logger.error('This is not normal. Recursive function called on random line.')
-            exit(1)
-
-    def not_empty_node_check(self):
-        if not self.rollback_addr:
-            logger.error("+-o in two consecutive lines, not supposed to be possible")
-            exit(1)
-
-    def check_key_uniqueness(self, dictio: dict, key: str):
-        if dictio.get(key):
-            logger.warning('Key is already in dictionary, data may be lost')
-
-    def fetch_node_data(self, data_tree: dict) -> bool:
-        node_data = []  # array of lines, to be transformed in json
-        res = True
-
-        while '+-o' not in self.line:
-            if not self.line:   # end of file
-                res = False
-                break
-
-            node_data.append(self.line)
-            self.get_line()
-
-        data_tree['Data'] = self.node_data_to_json(node_data)
-        return res
 
-    def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
-        """
-            some values overflow on the few next lines
-            this condition assumes there is no '=' in the exceeding data
-            (which was the case up to what I saw)
-
-            p.s. :  if you wonder why cond4 is necessary, it is only for
-                    the last leaf, which has no '|' symbols. without cond4,
-                    these lines would be seen as anomalies
-        """
-        cond1 = not re.search(r'^\s*\|+', data)
-        cond2 = len(data.strip()) > 0
-        cond3 = data.strip() not in ('{', '}')
-        cond4 = '=' not in data
-
-        if cond1 and cond2 and cond3 and cond4:
-            dictio[key] += data.strip()
-            return True
-        return False
-
-    def node_data_to_json(self, data_array: list[str]) -> dict:
-        res = {}
-        key = None
-
-        for data in data_array:
-            self.handle_anomalies(res, data, key)
-
-            # remove spaces and pipes at start
-            clean_line = re.sub(r'^(\s|\|)*', '', data)
-
-            if '=' not in clean_line:
-                continue
-
-            # split at the first equal only
-            key, value = clean_line.split('=', 1)
-
-            # remove first and last " (in case the key has more quotes inside)
-            key = key.replace('"', '', 1)
-            key = key[::-1].replace('"', '', 1)[::-1]
-            key = key.strip()
-
-            self.check_key_uniqueness(res, key)
-            res[key] = value.strip()
-
-        return res
-
-    def iterate_children(self, depth: int, data_tree_list: list[dict]):
-        while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'):
-            if self.line[depth: depth + 3] == '+-o':
-                data_tree_list.append({})
-                self.recursive_call(data_tree_list[-1])
-
-            else:
-                self.get_line()
-
-    def recursive_fun(self, data_tree: dict):
-        is_leaf = False
-        self.get_line()
-
-        # check if we're at the start of a node
-        self.check_start_node()
-
-        node_name = self.line.split("+-o")[1].strip()
-        data_tree['Name'] = node_name
-        data_tree['Children'] = []
-        depth = self.line.index('o')  # to identify the other nodes that have the same parent
-        self.get_line()
-
-        # check if its a leaf
-        if self.line[depth] != '|':
-            is_leaf = True
-
-        # Fetch the data of the node
-        if not self.fetch_node_data(data_tree):
-            return  # EOF
+        log_files = self.get_log_files()
+        data_tree = {}
 
-        # stop if we're a leaf
-        if is_leaf:
-            self.open_file.seek(self.rollback_addr)
-            return
+        for log_file in log_files:
+            try:
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                parser = IORegStructParser()
+                data_tree = parser.get_dict(log_file)
 
-        # sanity check
-        self.not_empty_node_check()
+            except Exception:
+                logger.exception("IOService parsing crashed")
 
-        # Iterates over each child to call the current function
-        self.iterate_children(depth, data_tree['Children'])
+        return data_tree
diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
new file mode 100644
index 0000000..a5e9eaf
--- /dev/null
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -0,0 +1,9 @@
+
+
+class IORegStringParser:
+    def __init__(self):
+        pass
+
+    def get_parsed(self, input_string: str):
+        list_of_elements = input_string.split(',')
+        return {"a": "b", "c": "d"}
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
new file mode 100644
index 0000000..ff06c10
--- /dev/null
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -0,0 +1,172 @@
+from sysdiagnose.utils.base import logger
+from sysdiagnose.utils.ioreg_parsers.string_parser import IORegStringParser
+import re
+
+class IORegStructParser:
+    rollback_addr = None
+    line = None
+
+    def __init__(self):
+        pass
+
+    def get_dict(self, file_path):
+        data_tree = {}
+
+        with open(file_path, 'r', errors='backslashreplace') as f:
+            self.open_file = f
+            self.recursive_fun(data_tree)
+
+        return data_tree
+
+    def get_line(self):
+        self.rollback_addr = self.open_file.tell()
+        self.line = self.open_file.readline()
+        self.line = self.line.replace('\n', '')
+
+    def recursive_call(self, data_tree: dict):
+        self.open_file.seek(self.rollback_addr)
+        self.recursive_fun(data_tree)
+
+    def check_start_node(self):
+        if '+-o' not in self.line:
+            logger.error('This is not normal. Recursive function called on random line.')
+            exit(1)
+
+    def not_empty_node_check(self):
+        if not self.rollback_addr:
+            logger.error("+-o in two consecutive lines, not supposed to be possible")
+            exit(1)
+
+    def check_key_uniqueness(self, dictio: dict, key: str):
+        if dictio.get(key):
+            logger.warning('Key is already in dictionary, data may be lost')
+
+    def fetch_node_data(self, data_tree: dict) -> bool:
+        node_data = []  # array of lines, to be transformed in json
+        res = True
+
+        while '+-o' not in self.line:
+            if not self.line:   # end of file
+                res = False
+                break
+
+            node_data.append(self.line)
+            self.get_line()
+
+        data_dict = self.node_data_to_json(node_data)
+        self.dict_update(data_tree, data_dict)
+
+        return res
+
+    def dict_update(self, main_dict, data_dict):
+        data_dict_len = len(data_dict)
+        main_dict_len = len(main_dict)
+        main_dict.update(data_dict)
+
+        if len(main_dict) != data_dict_len + main_dict_len:
+            logger.warning("One of the keys was already present in the json, data loss may occur")
+
+    def parse_title(self):
+        if "+-o" not in self.line:
+            logger.warning("'non-title' line given to title parser, should not happen")
+            return ""
+
+        whole_title = self.line.split("+-o", 1)[1].strip()
+
+        if "<class" not in whole_title or whole_title[-1] != '>':
+            logger.warning("Title doesnt respect the usual <class ... > format, to invesstigate")
+
+        name = whole_title.split('<class', 1)[0].strip()
+        data = "".join(whole_title.split('<class', 1)[1:])[:-1].strip()
+
+        return name, data
+
+    def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
+        """
+            some values overflow on the few next lines
+            this condition assumes there is no '=' in the exceeding data
+            (which was the case up to what I saw)
+
+            p.s. :  if you wonder why cond4 is necessary, it is only for
+                    the last leaf, which has no '|' symbols. without cond4,
+                    these lines would be seen as anomalies
+        """
+        cond1 = not re.search(r'^\s*\|+', data)
+        cond2 = len(data.strip()) > 0
+        cond3 = data.strip() not in ('{', '}')
+        cond4 = '=' not in data
+
+        if cond1 and cond2 and cond3 and cond4:
+            dictio[key] += data.strip()
+            return True
+        return False
+
+    def node_data_to_json(self, data_array: list[str]) -> dict:
+        res = {}
+        key = None
+
+        for data in data_array:
+            self.handle_anomalies(res, data, key)
+
+            # remove spaces and pipes at start
+            clean_line = re.sub(r'^(\s|\|)*', '', data)
+
+            if '=' not in clean_line:
+                continue
+
+            # split at the first equal only
+            key, value = clean_line.split('=', 1)
+
+            # remove first and last " (in case the key has more quotes inside)
+            key = key.replace('"', '', 1)
+            key = key[::-1].replace('"', '', 1)[::-1]
+            key = key.strip()
+
+            self.check_key_uniqueness(res, key)
+            res[key] = value.strip()
+
+        return res
+
+    def iterate_children(self, depth: int, data_tree: dict):
+        while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'):
+            if self.line[depth: depth + 3] == '+-o':
+                name = self.parse_title()[0]
+                self.check_key_uniqueness(data_tree, name)
+                data_tree[name] = {}
+                self.recursive_call(data_tree[name])
+
+            else:
+                self.get_line()
+
+    def recursive_fun(self, data_tree: dict):
+        is_leaf = False
+        self.get_line()
+
+        # check if we're at the start of a node
+        self.check_start_node()
+
+        additional_data = self.parse_title()[1]
+        additional_data = IORegStringParser().get_parsed(additional_data)
+        self.dict_update(data_tree, additional_data)
+
+        depth = self.line.index('o')  # to identify the other nodes that have the same parent
+        self.get_line()
+
+        # check if its a leaf
+        if self.line[depth] != '|':
+            is_leaf = True
+
+        # Fetch the data of the node
+        if not self.fetch_node_data(data_tree):
+            return  # EOF
+
+        # stop if we're a leaf
+        if is_leaf:
+            self.open_file.seek(self.rollback_addr)
+            return
+
+        # sanity check
+        self.not_empty_node_check()
+
+        # Iterates over each child to call the current function
+        self.iterate_children(depth, data_tree)

From 2de2208e0a3ba288b91a8d93aee21cb84cefd94a Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Wed, 17 Sep 2025 15:53:35 +0200
Subject: [PATCH 08/17] Added basic value string parsers, used for io* parsers

---
 src/sysdiagnose/parsers/ioservice.py          |  4 +-
 .../utils/ioreg_parsers/string_parser.py      | 66 +++++++++++++++++--
 .../utils/ioreg_parsers/structure_parser.py   | 10 +--
 tests/test_string_parser.py                   |  0
 4 files changed, 67 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_string_parser.py

diff --git a/src/sysdiagnose/parsers/ioservice.py b/src/sysdiagnose/parsers/ioservice.py
index b0c4ec3..26a84ef 100644
--- a/src/sysdiagnose/parsers/ioservice.py
+++ b/src/sysdiagnose/parsers/ioservice.py
@@ -35,8 +35,8 @@ def execute(self) -> list | dict:
         for log_file in log_files:
             try:
                 logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
-                parser = IORegStructParser()
-                data_tree = parser.get_dict(log_file)
+                p = IORegStructParser()
+                data_tree = p.parse(log_file)
 
             except Exception:
                 logger.exception("IOService parsing crashed")
diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index a5e9eaf..fed198d 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -1,9 +1,63 @@
+import re
+from enum import Enum
 
+class DataType(Enum):
+    XML_LIKE = 1
+    LIST = 2
 
-class IORegStringParser:
-    def __init__(self):
-        pass
 
-    def get_parsed(self, input_string: str):
-        list_of_elements = input_string.split(',')
-        return {"a": "b", "c": "d"}
+def parse_list(input_string: str):
+    input = re.search(r'\((.+,.+)\)', input_string).group(1)
+    list_of_elements = input.split(',')
+    res = []
+
+    for element in list_of_elements:
+        res.append(element.strip())
+
+    return res
+
+
+def parse_xml_like(input_string: str):
+    input = re.search(r'<(.+)>', input_string).group(1)
+    list_of_elements = input.split(',')
+    res = {}
+
+    for element in list_of_elements:
+        element = element.strip()
+        key = element.split(' ', 1)[0]
+        value = element.split(' ', 1)[1]
+        # TODO check key uniqueness
+        res[key] = value
+
+    return res
+
+def detect_type(input: str) -> DataType:
+    if re.search(r'<.+>', input):
+        return DataType.XML_LIKE
+
+    if re.search(r'\(.+,.+\)', input):
+        return DataType.LIST
+
+def parse(input_string: str):
+    input_string = input_string.strip()
+    type = detect_type(input_string)
+
+    match type:
+        case DataType.XML_LIKE:
+            parse_xml_like(input_string)
+
+        case DataType.LIST:
+            parse_list(input_string)
+
+        case _:
+            print('not found')
+
+
+test_1 = '<class IORegistryEntry, id 0x100000100, retain 52>'
+test_2 = '<class IORegistryEntry:IOService:IOPlatformExpertDevice, id 0x1000001cf, registered, matched, active, busy 0 (10227 ms), retain 29>'
+test_3 = '<class IORegistryEntry:IOService:IODTNVRAM, id 0x10000010e, registered, matched, active, busy 0 (158 ms), retain 11>'
+test_4 = '<class IORegistryEntry:IOService:IODTNVRAMDiags, id 0x1000001d0, registered, matched, active, busy 0 (33 ms), retain 6>'
+test_5 = '<class IORegistryEntry, id <0x100000, 0x200000>, retain 52>'
+test_6 = '<key: value, id idval, mykey: (myval, myval2), otherkey (otherval, otherval2)>'
+
+parse(test_5)
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index ff06c10..0dc1a4e 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -1,5 +1,5 @@
 from sysdiagnose.utils.base import logger
-from sysdiagnose.utils.ioreg_parsers.string_parser import IORegStringParser
+from sysdiagnose.utils.ioreg_parsers import string_parser
 import re
 
 class IORegStructParser:
@@ -9,7 +9,7 @@ class IORegStructParser:
     def __init__(self):
         pass
 
-    def get_dict(self, file_path):
+    def parse(self, file_path):
         data_tree = {}
 
         with open(file_path, 'r', errors='backslashreplace') as f:
@@ -39,7 +39,7 @@ def not_empty_node_check(self):
 
     def check_key_uniqueness(self, dictio: dict, key: str):
         if dictio.get(key):
-            logger.warning('Key is already in dictionary, data may be lost')
+            logger.warning('Key is already in dictionary, data may be lost\n\tKey : ' + key)
 
     def fetch_node_data(self, data_tree: dict) -> bool:
         node_data = []  # array of lines, to be transformed in json
@@ -77,7 +77,7 @@ def parse_title(self):
             logger.warning("Title doesnt respect the usual <class ... > format, to invesstigate")
 
         name = whole_title.split('<class', 1)[0].strip()
-        data = "".join(whole_title.split('<class', 1)[1:])[:-1].strip()
+        data = '<class ' + "".join(whole_title.split('<class', 1)[1:]).strip()
 
         return name, data
 
@@ -146,7 +146,7 @@ def recursive_fun(self, data_tree: dict):
         self.check_start_node()
 
         additional_data = self.parse_title()[1]
-        additional_data = IORegStringParser().get_parsed(additional_data)
+        additional_data = string_parser.get_parsed(additional_data)
         self.dict_update(data_tree, additional_data)
 
         depth = self.line.index('o')  # to identify the other nodes that have the same parent
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
new file mode 100644
index 0000000..e69de29

From b5a91880081e4550d4040d8af0ce2938d4bb0d6e Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Thu, 18 Sep 2025 15:38:15 +0200
Subject: [PATCH 09/17] Added recursive version of string_parser, still
 rudimentary

---
 .../utils/ioreg_parsers/string_parser.py      | 163 +++++++++++++++---
 1 file changed, 138 insertions(+), 25 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index fed198d..f4587ad 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -1,14 +1,102 @@
 import re
 from enum import Enum
+from sysdiagnose.utils.base import logger
+import uuid
 
 class DataType(Enum):
     XML_LIKE = 1
     LIST = 2
-
-
-def parse_list(input_string: str):
-    input = re.search(r'\((.+,.+)\)', input_string).group(1)
-    list_of_elements = input.split(',')
+    UNKNOWN = 3
+
+class Detect:
+    _best_len = 0
+    _best_type = DataType.UNKNOWN
+    _best_whole = ""      # whole match, for example : <data1, data2>
+    _best_content = ""    # content, for example : data1, data2
+    _found = False
+
+    def __init__(self, input_string: str):
+        self.detect_type(input_string)
+
+    def detect_type(self, input: str):
+        hit = re.search(r'<(.*)>', input)
+        if hit and len(hit.group(0)) > self.len:
+            self.assign_best(hit, DataType.XML_LIKE)
+
+        hit = re.search(r'\((.+,.+)\)', input)
+        if hit and len(hit.group(0)) > self.len:
+            self.assign_best(hit, DataType.LIST)
+
+    def assign_best(self, hit: re.Match, type: DataType):
+        self._best_len = len(hit.group(0))
+        self._best_type = type
+        self._best_whole = hit.group(0)
+        self._best_content = hit.group(1)
+        self._found = True
+
+    @property
+    def len(self) -> int:
+        return self._best_len
+    
+    @property
+    def type(self) -> DataType:
+        return self._best_type
+    
+    @property
+    def whole_match(self) -> str:
+        return self._best_whole
+    
+    @property
+    def content(self) -> str:
+        return self._best_content
+    
+    @property
+    def found(self) -> bool:
+        return self._found
+
+
+def generate_tag() -> str:
+    return str(uuid.uuid4())
+
+def check_anomaly(s: str, tag: str):
+    diff = s.replace(tag, '')
+
+    if tag in s and diff:
+        logger.warning("Anomaly, some data was right next to " \
+        "the struct (without space), this data is thus lost : ", diff)
+
+def list_replace(tagged_struct: list, tag: str, st: dict | list):
+    for i in range(len(tagged_struct)):
+        elem = tagged_struct[i]
+        if type(elem) == str and tag in elem:
+            check_anomaly(elem, tag)
+            tagged_struct[i] = st
+
+def dict_replace(tagged_struct: dict, tag: str, st: dict | list):
+    for key in tagged_struct:
+        elem = tagged_struct[key]
+        if type(elem) == str and tag in elem:
+            check_anomaly(elem, tag)
+            tagged_struct[key] = st
+
+def struct_replace(tagged_struct: dict | list, type: DataType, tag: str, st: dict | list):
+    try:
+        match type:
+            case DataType.LIST:
+                list_replace(tagged_struct, tag, st)
+
+            case DataType.XML_LIKE:
+                dict_replace(tagged_struct, tag, st)
+
+            case _:
+                pass
+    
+    except:
+        logger.error("When rebuilding the struct in struct_replace, the argument 'type' doesn't correspond to the given tagged_struct")
+        exit(1)
+
+def parse_list(input_string: str) -> list:
+    list_of_elements = input_string.split(',')
     res = []
 
     for element in list_of_elements:
@@ -16,10 +104,8 @@ def parse_list(input_string: str):
 
     return res
 
-
-def parse_xml_like(input_string: str):
-    input = re.search(r'<(.+)>', input_string).group(1)
-    list_of_elements = input.split(',')
+def parse_xml_like(input_string: str) -> dict:
+    list_of_elements = input_string.split(',')
     res = {}
 
     for element in list_of_elements:
@@ -31,33 +117,60 @@ def parse_xml_like(input_string: str):
 
     return res
 
-def detect_type(input: str) -> DataType:
-    if re.search(r'<.+>', input):
-        return DataType.XML_LIKE
-
-    if re.search(r'\(.+,.+\)', input):
-        return DataType.LIST
-
-def parse(input_string: str):
-    input_string = input_string.strip()
-    type = detect_type(input_string)
-
+def parse_type(input_string: str, type: DataType):
     match type:
         case DataType.XML_LIKE:
-            parse_xml_like(input_string)
+            return parse_xml_like(input_string)
 
         case DataType.LIST:
-            parse_list(input_string)
+            return parse_list(input_string)
 
         case _:
             print('not found')
 
+def recursive_parse(input: str):
+    input = input.strip()
+    hit = Detect(input)
+    tagged_content = hit.content
+    tag_map = {}
+
+    # recursion stop
+    if not hit.found:
+        return "", ""
+
+    # recursion
+    sub_string, sub_struct = recursive_parse(hit.content)
+
+    if not sub_string:
+        # form basic struct
+        tagged_struct = parse_type(tagged_content, hit.type)
+        return hit.whole_match, tagged_struct
+
+    # replace struct by a unique tag
+    tag = generate_tag()
+    tagged_content = tagged_content.replace(sub_string, tag)
+
+    # link tag with its computed struct
+    tag_map[tag] = sub_struct
+
+    # form basic struct
+    tagged_struct = parse_type(tagged_content, hit.type)
+
+    # include recursively computed struct
+    struct_replace(tagged_struct, hit.type, tag, tag_map[tag])
+
+    return hit.whole_match, tagged_struct
+
+def parse(input_string: str):
+    return recursive_parse(input_string)[1]
+
 
 test_1 = '<class IORegistryEntry, id 0x100000100, retain 52>'
 test_2 = '<class IORegistryEntry:IOService:IOPlatformExpertDevice, id 0x1000001cf, registered, matched, active, busy 0 (10227 ms), retain 29>'
 test_3 = '<class IORegistryEntry:IOService:IODTNVRAM, id 0x10000010e, registered, matched, active, busy 0 (158 ms), retain 11>'
 test_4 = '<class IORegistryEntry:IOService:IODTNVRAMDiags, id 0x1000001d0, registered, matched, active, busy 0 (33 ms), retain 6>'
-test_5 = '<class IORegistryEntry, id <0x100000, 0x200000>, retain 52>'
-test_6 = '<key: value, id idval, mykey: (myval, myval2), otherkey (otherval, otherval2)>'
+test_5 = '<class IORegistryEntry, id (0x100000, 0x200000), retain 52>'
+test_6 = '<class IORegistryEntry, id <0x100000, 0x200000>, user <alec, gilles>, retain 52>'
+test_7 = '<key: value, id idval, mykey: (myval, myval2), otherkey (otherval, otherval2)>'
 
-parse(test_5)
+print(parse(test_7))

From 49b1982f2b95235db7b0df0878557d0dadd60fec Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Thu, 18 Sep 2025 20:28:01 +0200
Subject: [PATCH 10/17] Refactor of algo logic in utils/string_parser

---
 .../utils/ioreg_parsers/string_parser.py      | 154 ++++++++++--------
 1 file changed, 86 insertions(+), 68 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index f4587ad..0036386 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -9,7 +9,7 @@ class DataType(Enum):
     UNKNOWN = 3
 
 class Detect:
-    _best_len = 0
+    _best_len = float('inf')
     _best_type = DataType.UNKNOWN
     _best_whole = ""      # whole match, for example : <data1, data2>
     _best_content = ""    # content, for example : data1, data2
@@ -19,12 +19,14 @@ def __init__(self, input_string: str):
         self.detect_type(input_string)
 
     def detect_type(self, input: str):
-        hit = re.search(r'<(.*)>', input)
-        if hit and len(hit.group(0)) > self.len:
+        # find the smallest
+        hit = re.search(r'<((?!.*<.*).*?)>', input)
+        if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.XML_LIKE)
 
-        hit = re.search(r'\((.+,.+)\)', input)
-        if hit and len(hit.group(0)) > self.len:
+        # find the smallest
+        hit = re.search(r'\(((?!.*\(.*).+?,.+?)\)', input)
+        if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.LIST)
 
     def assign_best(self, hit: re.Match, type: DataType):
@@ -37,19 +39,19 @@ def assign_best(self, hit: re.Match, type: DataType):
     @property
     def len(self) -> int:
         return self._best_len
-    
+
     @property
     def type(self) -> DataType:
         return self._best_type
-    
+
     @property
     def whole_match(self) -> str:
         return self._best_whole
-    
+
     @property
     def content(self) -> str:
         return self._best_content
-    
+
     @property
     def found(self) -> bool:
         return self._found
@@ -58,42 +60,19 @@ def found(self) -> bool:
 def generate_tag() -> str:
     return str(uuid.uuid4())
 
+
 def check_anomaly(s: str, tag: str):
     diff = s.replace(tag, '')
 
     if tag in s and diff:
-        logger.warning("Anomaly, some data was right next to " \
-        "the struct (without space), this data is thus lost : ", diff)
+        logger.warning("Warning : Anomaly: some data was right next to "
+                       "the struct (without space), this data is thus lost\n---> " + diff)
 
-def list_replace(tagged_struct: list, tag: str, st: dict | list):
-    for i in range(len(tagged_struct)):
-        elem = tagged_struct[i]
-        if type(elem) == str and tag in elem:
-            check_anomaly(elem, tag)
-            tagged_struct[i] = st
 
-def dict_replace(tagged_struct: dict, tag: str, st: dict | list):
-    for key in tagged_struct:
-        elem = tagged_struct[key]
-        if type(elem) == str and tag in elem:
-            check_anomaly(elem, tag)
-            tagged_struct[key] = st
-
-def struct_replace(tagged_struct: dict | list, type: DataType, tag: str, st: dict | list):
-    try:
-        match type:
-            case DataType.LIST:
-                list_replace(tagged_struct, tag, st)
-
-            case DataType.XML_LIKE:
-                dict_replace(tagged_struct, tag, st)
-
-            case _:
-                pass
-    
-    except:
-        logger.error("When rebuilding the struct in struct_replace, the argument 'type' doesn't correspond to the given tagged_struct")
-        exit(1)
+def check_key_uniqueness(dictio: dict, key: str):
+    if dictio.get(key):
+        logger.warning('Warning : Key is already in dictionary, data may be lost\n---> ' + key)
+
 
 def parse_list(input_string: str) -> list:
     list_of_elements = input_string.split(',')
@@ -104,6 +83,7 @@ def parse_list(input_string: str) -> list:
 
     return res
 
+
 def parse_xml_like(input_string: str) -> dict:
     list_of_elements = input_string.split(',')
     res = {}
@@ -112,11 +92,12 @@ def parse_xml_like(input_string: str) -> dict:
         element = element.strip()
         key = element.split(' ', 1)[0]
         value = element.split(' ', 1)[1]
-        # TODO check key uniqueness
+        check_key_uniqueness(res, key)
         res[key] = value
 
     return res
 
+
 def parse_type(input_string: str, type: DataType):
     match type:
         case DataType.XML_LIKE:
@@ -128,41 +109,77 @@ def parse_type(input_string: str, type: DataType):
         case _:
             print('not found')
 
-def recursive_parse(input: str):
-    input = input.strip()
-    hit = Detect(input)
-    tagged_content = hit.content
-    tag_map = {}
 
-    # recursion stop
-    if not hit.found:
-        return "", ""
+def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
+    for key in final_struct:
+        elem = final_struct[key]
 
-    # recursion
-    sub_string, sub_struct = recursive_parse(hit.content)
+        if isinstance(elem, str) and tag in elem:
+            check_anomaly(elem, tag)
+            final_struct[key] = constructed
+            return True
 
-    if not sub_string:
-        # form basic struct
-        tagged_struct = parse_type(tagged_content, hit.type)
-        return hit.whole_match, tagged_struct
+        elif isinstance(elem, list):
+            if resolve_tag_list(elem, tag, constructed):
+                return True
+
+    return False
 
-    # replace struct by a unique tag
-    tag = generate_tag()
-    tagged_content = tagged_content.replace(sub_string, tag)
 
-    # link tag with its computed struct
-    tag_map[tag] = sub_struct
+def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
+    for i in range(len(final_struct)):
+        elem = final_struct[i]
+
+        if isinstance(elem, str) and tag in elem:
+            check_anomaly(elem, tag)
+            final_struct[i] = constructed
+            return True
+
+        elif isinstance(elem, dict):
+            if resolve_tag_dict(elem, tag, constructed):
+                return True
+
+    return False
+
+
+def resolve_tag(final_struct: dict | list, tag: str, constructed: dict | list):
+    if isinstance(final_struct, dict):
+        resolve_tag_dict(final_struct, tag, constructed)
+
+    elif isinstance(final_struct, list):
+        resolve_tag_list(final_struct, tag, constructed)
+
+    else:
+        logger.error('Error : struct type not found')
+        exit(1)
+
+
+def parse(data_string: str):
+    data_string = data_string.strip()
+    hit = Detect(data_string)
+    final_struct = None
+
+    # recursion stop
+    if not hit.found:
+        return None
 
     # form basic struct
-    tagged_struct = parse_type(tagged_content, hit.type)
+    constructed = parse_type(hit.content, hit.type)
+
+    # replace struct by an unique tag
+    tag = generate_tag()
+    data_string = data_string.replace(hit.whole_match, tag)
 
-    # include recursively computed struct
-    struct_replace(tagged_struct, hit.type, tag, tag_map[tag])
+    # recursion
+    final_struct = parse(data_string)
 
-    return hit.whole_match, tagged_struct
+    # reconstruct data structure
+    if not final_struct:
+        final_struct = constructed      # at the root
+    else:
+        resolve_tag(final_struct, tag, constructed)
 
-def parse(input_string: str):
-    return recursive_parse(input_string)[1]
+    return final_struct
 
 
 test_1 = '<class IORegistryEntry, id 0x100000100, retain 52>'
@@ -170,7 +187,8 @@ def parse(input_string: str):
 test_3 = '<class IORegistryEntry:IOService:IODTNVRAM, id 0x10000010e, registered, matched, active, busy 0 (158 ms), retain 11>'
 test_4 = '<class IORegistryEntry:IOService:IODTNVRAMDiags, id 0x1000001d0, registered, matched, active, busy 0 (33 ms), retain 6>'
 test_5 = '<class IORegistryEntry, id (0x100000, 0x200000), retain 52>'
-test_6 = '<class IORegistryEntry, id <0x100000, 0x200000>, user <alec, gilles>, retain 52>'
-test_7 = '<key: value, id idval, mykey: (myval, myval2), otherkey (otherval, otherval2)>'
+test_6 = '<class IORegistryEntry, id (0x100000, 0x200000, <key1 val1 , key2 val2,key3 (li1,li2 , li3, li4)>, 0x300000), retain 52>'
+test_7 = '<class IORegistryEntry, id <0x100000, 0x200000>, user <alec, gilles>, retain 52>'
+test_8 = '<key: value, id idval, mykey: (myval, < k1 v1, k2 v2 >), otherkey (otherval, otherval2)>'
 
-print(parse(test_7))
+print(parse(test_8))

From 61d386ca1ad76431207bcf4ae4526ff9f3288dfc Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Fri, 19 Sep 2025 17:38:40 +0200
Subject: [PATCH 11/17] Enhanced string_parser + Added solid tests + fixes here
 and there

---
 .../utils/ioreg_parsers/string_parser.py      | 87 ++++++++++++++-----
 .../utils/ioreg_parsers/structure_parser.py   |  3 +-
 tests/test_string_parser.py                   | 60 +++++++++++++
 3 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index 0036386..5242b9d 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -6,7 +6,8 @@
 class DataType(Enum):
     XML_LIKE = 1
     LIST = 2
-    UNKNOWN = 3
+    STRING = 3
+    UNKNOWN = 4
 
 class Detect:
     _best_len = float('inf')
@@ -19,16 +20,34 @@ def __init__(self, input_string: str):
         self.detect_type(input_string)
 
     def detect_type(self, input: str):
-        # find the smallest
-        hit = re.search(r'<((?!.*<.*).*?)>', input)
+        """         Note on the match types
+
+            XML_LIKE : data inside < >
+
+            LIST : data in parentheses with at least one comma
+
+            STRING : parentheses that dont contain any comma.
+                example : I'm good at coding (not really)  <-- shouldn't be a list, simply text
+
+        """  # noqa: W605
+
+        # find xml like dict ex : <key value, k2 v2>
+        hit = re.search(r'<([^<>]*)>', input)
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.XML_LIKE)
 
-        # find the smallest
-        hit = re.search(r'\(((?!.*\(.*).+?,.+?)\)', input)
+        # find list in parentheses ex : (a, b, c)
+        hit = re.search(r'\(([^()]*,[^()]*)\)', input)
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.LIST)
 
+        # find simple parentheses without ',' ex : (hello world)
+        hit = re.search(r'(\([^,)(]*\))', input)
+        if hit and len(hit.group(0)) < self._best_len:
+            self.assign_best(hit, DataType.STRING)
+
+        self.warn_unknown_struct(input)
+
     def assign_best(self, hit: re.Match, type: DataType):
         self._best_len = len(hit.group(0))
         self._best_type = type
@@ -36,6 +55,18 @@ def assign_best(self, hit: re.Match, type: DataType):
         self._best_content = hit.group(1)
         self._found = True
 
+    def warn_unknown_struct(self, input: str):
+        main_cond = self._best_type is DataType.UNKNOWN
+        cond_1 = '<' in input and '>' in input
+        cond_2 = '(' in input and ')' in input
+        cond_3 = '[' in input and ']' in input
+        cond_4 = '{' in input and '}' in input
+
+        if (main_cond and (cond_1 or cond_2 or cond_3 or cond_4)):
+            logger.warning('Warning : A structure might have been recognized '
+                           'in here, if so please consider adding it to the '
+                           'string_parser.py file\n---> ' + input)
+
     @property
     def len(self) -> int:
         return self._best_len
@@ -91,7 +122,8 @@ def parse_xml_like(input_string: str) -> dict:
     for element in list_of_elements:
         element = element.strip()
         key = element.split(' ', 1)[0]
-        value = element.split(' ', 1)[1]
+        value = element.split(' ', 1)[1].strip()
+        # TODO if only a key is present, add true as value
         check_key_uniqueness(res, key)
         res[key] = value
 
@@ -106,8 +138,12 @@ def parse_type(input_string: str, type: DataType):
         case DataType.LIST:
             return parse_list(input_string)
 
+        case DataType.STRING:
+            return input_string
+
         case _:
-            print('not found')
+            logger.error("Error : Type not found in parse_type(). (Note : "
+                         "you probably forgot to add it to the match case)")
 
 
 def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
@@ -115,14 +151,21 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
         elem = final_struct[key]
 
         if isinstance(elem, str) and tag in elem:
-            check_anomaly(elem, tag)
-            final_struct[key] = constructed
+            if isinstance(constructed, str):
+                final_struct[key] = final_struct[key].replace(tag, constructed)
+            else:
+                check_anomaly(elem, tag)
+                final_struct[key] = constructed
             return True
 
         elif isinstance(elem, list):
             if resolve_tag_list(elem, tag, constructed):
                 return True
 
+        elif isinstance(elem, dict):
+            if resolve_tag_dict(elem, tag, constructed):
+                return True
+
     return False
 
 
@@ -130,11 +173,19 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
     for i in range(len(final_struct)):
         elem = final_struct[i]
 
+        # TODO repetition with resolve_tag_dict, put in a func
         if isinstance(elem, str) and tag in elem:
-            check_anomaly(elem, tag)
-            final_struct[i] = constructed
+            if isinstance(constructed, str):
+                final_struct[i] = final_struct[i].replace(tag, constructed)
+            else:
+                check_anomaly(elem, tag)
+                final_struct[i] = constructed
             return True
 
+        elif isinstance(elem, list):
+            if resolve_tag_list(elem, tag, constructed):
+                return True
+
         elif isinstance(elem, dict):
             if resolve_tag_dict(elem, tag, constructed):
                 return True
@@ -168,7 +219,7 @@ def parse(data_string: str):
 
     # replace struct by an unique tag
     tag = generate_tag()
-    data_string = data_string.replace(hit.whole_match, tag)
+    data_string = data_string.replace(hit.whole_match, tag, 1)
 
     # recursion
     final_struct = parse(data_string)
@@ -180,15 +231,3 @@ def parse(data_string: str):
         resolve_tag(final_struct, tag, constructed)
 
     return final_struct
-
-
-test_1 = '<class IORegistryEntry, id 0x100000100, retain 52>'
-test_2 = '<class IORegistryEntry:IOService:IOPlatformExpertDevice, id 0x1000001cf, registered, matched, active, busy 0 (10227 ms), retain 29>'
-test_3 = '<class IORegistryEntry:IOService:IODTNVRAM, id 0x10000010e, registered, matched, active, busy 0 (158 ms), retain 11>'
-test_4 = '<class IORegistryEntry:IOService:IODTNVRAMDiags, id 0x1000001d0, registered, matched, active, busy 0 (33 ms), retain 6>'
-test_5 = '<class IORegistryEntry, id (0x100000, 0x200000), retain 52>'
-test_6 = '<class IORegistryEntry, id (0x100000, 0x200000, <key1 val1 , key2 val2,key3 (li1,li2 , li3, li4)>, 0x300000), retain 52>'
-test_7 = '<class IORegistryEntry, id <0x100000, 0x200000>, user <alec, gilles>, retain 52>'
-test_8 = '<key: value, id idval, mykey: (myval, < k1 v1, k2 v2 >), otherkey (otherval, otherval2)>'
-
-print(parse(test_8))
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index 0dc1a4e..ec655e6 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -54,6 +54,7 @@ def fetch_node_data(self, data_tree: dict) -> bool:
             self.get_line()
 
         data_dict = self.node_data_to_json(node_data)
+        # TODO parse each value
         self.dict_update(data_tree, data_dict)
 
         return res
@@ -146,7 +147,7 @@ def recursive_fun(self, data_tree: dict):
         self.check_start_node()
 
         additional_data = self.parse_title()[1]
-        additional_data = string_parser.get_parsed(additional_data)
+        additional_data = string_parser.parse(additional_data)
         self.dict_update(data_tree, additional_data)
 
         depth = self.line.index('o')  # to identify the other nodes that have the same parent
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
index e69de29..f82cf02 100644
--- a/tests/test_string_parser.py
+++ b/tests/test_string_parser.py
@@ -0,0 +1,60 @@
+from tests import SysdiagnoseTestCase
+import unittest
+import sysdiagnose.utils.ioreg_parsers.string_parser as sp
+
+
+class TestStringParser(SysdiagnoseTestCase):
+
+    test_list = [
+        '<key val, k2 v2>',
+        '(li1, li2, li3, li4)',
+        '< k1     v1  ,  k2 v2,   k3    v3      ,k4 v4    >',
+        '<k1 <k11 v11>>',
+        '(    li 1, li   2   , li3)',
+        '<k1 v1, k2 v2, k:3 (li1 , li2 ,li3, li4 )  ,k4 v4 >',
+        '<k1 v1, k2 (li1 , li2 ,li3, li4 )  ,k3 (li11, li22)     , k4 (li111, li222, li333) >',
+        '<k1 v1, k2 (li1 , li2,li3, li4 )  ,k3 <k11 v11,k22 v22>     , k4 (li111, li222, li333) >',
+        '<l (1, 2, <k (,,,)>), m <g (), k ( ,), m ((),(()))>>',
+        '<k1 v1, k2 <k11 v11, k22 v22>  ,k3 (<k111 <a b, c (l1, l2)>>, (li111), (li8, li9))    , k4 (li111, li222, li333) >'
+    ]
+
+    expected_parsed = [
+        {'key': 'val', 'k2': 'v2'},
+        ['li1', 'li2', 'li3', 'li4'],
+        {'k1': 'v1', 'k2': 'v2', 'k3': 'v3', 'k4': 'v4'},
+        {'k1': {'k11': 'v11'}},
+        ['li 1', 'li   2', 'li3'],
+        {'k1': 'v1', 'k2': 'v2', 'k:3': ['li1', 'li2', 'li3', 'li4'], 'k4': 'v4'},
+        {'k1': 'v1', 'k2': ['li1', 'li2', 'li3', 'li4'], 'k3': ['li11', 'li22'], 'k4': ['li111', 'li222', 'li333']},
+        {'k1': 'v1', 'k2': ['li1', 'li2', 'li3', 'li4'], 'k3': {'k11': 'v11', 'k22': 'v22'}, 'k4': ['li111', 'li222', 'li333']},
+        {'l': ['1', '2', {'k': ['', '', '', '']}], 'm': {'g': '()', 'k': ['', ''], 'm': ['()', '(())']}},
+        {'k1': 'v1', 'k2': {'k11': 'v11', 'k22': 'v22'}, 'k3': [{'k111': {'a': 'b', 'c': ['l1', 'l2']}}, '(li111)', ['li8', 'li9']], 'k4': ['li111', 'li222', 'li333']}
+    ]
+
+    expected_detect = [
+        ('key val, k2 v2', sp.DataType.XML_LIKE),
+        ('li1, li2, li3, li4', sp.DataType.LIST),
+        (' k1     v1  ,  k2 v2,   k3    v3      ,k4 v4    ', sp.DataType.XML_LIKE),
+        ('k11 v11', sp.DataType.XML_LIKE),
+        ('    li 1, li   2   , li3', sp.DataType.LIST),
+        ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
+        ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
+        ('k11 v11,k22 v22', sp.DataType.XML_LIKE),
+        ('()', sp.DataType.STRING),
+        ('(li111)', sp.DataType.STRING)
+    ]
+
+    def test_detect(self):
+        for test_val, (exp_cont, exp_type) in zip(self.test_list, self.expected_detect):
+            d = sp.Detect(test_val)
+            self.assertTrue(d.content == exp_cont)
+            self.assertTrue(d.type == exp_type)
+
+    def test_parsing(self):
+        for test_val, expected in zip(self.test_list, self.expected_parsed):
+            result = sp.parse(test_val)
+            self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 5c81d1cc983f86fc397c692d7f51f095d4cbada0 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Mon, 22 Sep 2025 13:34:36 +0200
Subject: [PATCH 12/17] string_parser now fully working on the testfile, quoted
 values not optimized yet

---
 .../utils/ioreg_parsers/string_parser.py      | 128 ++++++++++++++----
 .../utils/ioreg_parsers/structure_parser.py   |  60 ++++++--
 tests/test_string_parser.py                   |   8 +-
 3 files changed, 157 insertions(+), 39 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index 5242b9d..3fc928a 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -4,10 +4,11 @@
 import uuid
 
 class DataType(Enum):
-    XML_LIKE = 1
-    LIST = 2
-    STRING = 3
-    UNKNOWN = 4
+    XML_DICT = 1
+    CURLY_DICT = 2
+    LIST = 3
+    STRING = 4
+    UNKNOWN = 5
 
 class Detect:
     _best_len = float('inf')
@@ -22,7 +23,10 @@ def __init__(self, input_string: str):
     def detect_type(self, input: str):
         """         Note on the match types
 
-            XML_LIKE : data inside < >
+            XML_DICT : data inside <>
+                excluded : <> , < > , <       >
+
+            CURLY_DICT : like xml_dict but with {} instead of <>
 
             LIST : data in parentheses with at least one comma
 
@@ -32,12 +36,17 @@ def detect_type(self, input: str):
         """  # noqa: W605
 
         # find xml like dict ex : <key value, k2 v2>
-        hit = re.search(r'<([^<>]*)>', input)
+        hit = self.find_smallest(r'<([^<>]*[^\s<>][^<>]*)>', input)
+        if hit and len(hit.group(0)) < self._best_len:
+            self.assign_best(hit, DataType.XML_DICT)
+
+        # find dict in {} ex : {key1=val1, k2=v2}
+        hit = self.find_smallest(r'{([^{}]*)}', input)
         if hit and len(hit.group(0)) < self._best_len:
-            self.assign_best(hit, DataType.XML_LIKE)
+            self.assign_best(hit, DataType.CURLY_DICT)
 
         # find list in parentheses ex : (a, b, c)
-        hit = re.search(r'\(([^()]*,[^()]*)\)', input)
+        hit = self.find_smallest(r'\(([^()]*,[^()]*)\)', input)
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.LIST)
 
@@ -46,6 +55,16 @@ def detect_type(self, input: str):
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.STRING)
 
+        # find [] parentheses without ',' nor '=' ex : [hello world]
+        hit = re.search(r'(\[[^,=\[\]]*\])', input)
+        if hit and len(hit.group(0)) < self._best_len:
+            self.assign_best(hit, DataType.STRING)
+
+        # find simple double-quotes ex : "hello world"
+        hit = re.search(r'("[^"]*")', input)
+        if hit and len(hit.group(0)) < self._best_len:
+            self.assign_best(hit, DataType.STRING)
+
         self.warn_unknown_struct(input)
 
     def assign_best(self, hit: re.Match, type: DataType):
@@ -55,14 +74,21 @@ def assign_best(self, hit: re.Match, type: DataType):
         self._best_content = hit.group(1)
         self._found = True
 
+    def find_smallest(self, regex: str, data: str):
+        pattern = re.compile(regex)
+        matches = list(pattern.finditer(data))
+        if not matches:
+            return None
+        return min(matches, key=lambda m: len(m.group(0)))
+
     def warn_unknown_struct(self, input: str):
         main_cond = self._best_type is DataType.UNKNOWN
+        cond_exceptions = input != '{}' and input != '<>' and input != '()'
         cond_1 = '<' in input and '>' in input
         cond_2 = '(' in input and ')' in input
-        cond_3 = '[' in input and ']' in input
-        cond_4 = '{' in input and '}' in input
+        cond_3 = '{' in input and '}' in input
 
-        if (main_cond and (cond_1 or cond_2 or cond_3 or cond_4)):
+        if (main_cond and cond_exceptions and (cond_1 or cond_2 or cond_3)):
             logger.warning('Warning : A structure might have been recognized '
                            'in here, if so please consider adding it to the '
                            'string_parser.py file\n---> ' + input)
@@ -94,11 +120,17 @@ def generate_tag() -> str:
 
 def check_anomaly(s: str, tag: str):
     diff = s.replace(tag, '')
+    structured = s.replace(tag, '[STRUCT]')
+    # cases we dont have to warn about. ex : ((<key: val>)) is same as <key: val>
 
-    if tag in s and diff:
+    if tag in s and diff and not is_redundent_syntax_regex(diff):
         logger.warning("Warning : Anomaly: some data was right next to "
-                       "the struct (without space), this data is thus lost\n---> " + diff)
+                       "the struct (without space), this data is thus lost\n---> " + structured)
 
+def is_redundent_syntax_regex(s: str):
+    """ If we have for example ([ ]) around a struct, we consider it useless
+        Example : "[(<key value, k1 v1>)]" is the same as <key value, k1 v1> """
+    return re.search(r'^[(){}\[\]<>""]+$', s)
 
 def check_key_uniqueness(dictio: dict, key: str):
     if dictio.get(key):
@@ -115,15 +147,27 @@ def parse_list(input_string: str) -> list:
     return res
 
 
-def parse_xml_like(input_string: str) -> dict:
+def parse_dict(input_string: str, separator: str) -> dict:
     list_of_elements = input_string.split(',')
     res = {}
 
+    if list_of_elements == ['']:
+        return res
+
     for element in list_of_elements:
         element = element.strip()
-        key = element.split(' ', 1)[0]
-        value = element.split(' ', 1)[1].strip()
-        # TODO if only a key is present, add true as value
+        splitted = element.split(separator, 1)
+        key = splitted[0]
+
+        # value is true/false if there is only a key
+        if len(splitted) > 1:
+            value = splitted[1].strip()
+        elif key[0] == '!':
+            value = 'false'
+            key = key[1:]
+        else:
+            value = 'true'
+
         check_key_uniqueness(res, key)
         res[key] = value
 
@@ -132,8 +176,11 @@ def parse_xml_like(input_string: str) -> dict:
 
 def parse_type(input_string: str, type: DataType):
     match type:
-        case DataType.XML_LIKE:
-            return parse_xml_like(input_string)
+        case DataType.XML_DICT:
+            return parse_dict(input_string, ' ')
+
+        case DataType.CURLY_DICT:
+            return parse_dict(input_string, '=')
 
         case DataType.LIST:
             return parse_list(input_string)
@@ -158,6 +205,17 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
                 final_struct[key] = constructed
             return True
 
+        elif isinstance(key, str) and tag in key:
+            if isinstance(constructed, str):
+                new_key = key.replace(tag, constructed)
+                value = final_struct[key]
+                del final_struct[key]
+                final_struct[new_key] = value
+            else:
+                logger.error("Error : Trying to use a struct as a key in a dict")
+                final_struct[key] = constructed
+            return True
+
         elif isinstance(elem, list):
             if resolve_tag_list(elem, tag, constructed):
                 return True
@@ -193,20 +251,44 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
     return False
 
 
-def resolve_tag(final_struct: dict | list, tag: str, constructed: dict | list):
+def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str):
     if isinstance(final_struct, dict):
         resolve_tag_dict(final_struct, tag, constructed)
 
     elif isinstance(final_struct, list):
         resolve_tag_list(final_struct, tag, constructed)
 
+    # TODO struct in string doesnt work, for example (<some>)
+    elif isinstance(final_struct, str):
+        if not isinstance(constructed, str):
+            if final_struct.replace(tag, "") == '()':
+                final_struct = constructed
+            else:
+                user_friendly = final_struct.replace(tag, "[STRUCT]")
+                lost_data = final_struct.replace(tag, "")
+                if not is_redundent_syntax_regex(lost_data):
+                    logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly)
+                final_struct = constructed
+        else:
+            final_struct = final_struct.replace(tag, constructed)
+
     else:
         logger.error('Error : struct type not found')
         exit(1)
 
+    # return is necessary bcs strings are not passed by reference in python
+    return final_struct
+
 
-def parse(data_string: str):
+def parse(data_string: str, first_run: bool = True):
     data_string = data_string.strip()
+    # if first_run: print('========= ' + data_string)
+
+    # dont parse if too long
+    if first_run and len(data_string) > 10000:
+        logger.warning('Skipped a too long lines with ' + str(len(data_string)) + ' characters')
+        return data_string
+
     hit = Detect(data_string)
     final_struct = None
 
@@ -222,12 +304,12 @@ def parse(data_string: str):
     data_string = data_string.replace(hit.whole_match, tag, 1)
 
     # recursion
-    final_struct = parse(data_string)
+    final_struct = parse(data_string, False)
 
     # reconstruct data structure
     if not final_struct:
         final_struct = constructed      # at the root
     else:
-        resolve_tag(final_struct, tag, constructed)
+        final_struct = resolve_tag(final_struct, tag, constructed)
 
     return final_struct
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index ec655e6..016ce3f 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -54,18 +54,30 @@ def fetch_node_data(self, data_tree: dict) -> bool:
             self.get_line()
 
         data_dict = self.node_data_to_json(node_data)
-        # TODO parse each value
+        # TODO test this
+        self.parse_values(data_dict)
         self.dict_update(data_tree, data_dict)
 
         return res
 
-    def dict_update(self, main_dict, data_dict):
-        data_dict_len = len(data_dict)
-        main_dict_len = len(main_dict)
-        main_dict.update(data_dict)
+    def parse_values(self, data_dict: dict):
+        for key in data_dict:
+            value = data_dict[key]
+            constructed = string_parser.parse(value)
+            if constructed:
+                data_dict[key] = constructed
 
-        if len(main_dict) != data_dict_len + main_dict_len:
-            logger.warning("One of the keys was already present in the json, data loss may occur")
+    def dict_update(self, main_dict, data_dict):
+        """ Redefining the dict.update function to handle key collisions """
+
+        for key in data_dict:
+            if main_dict.get(key):
+                if isinstance(main_dict[key], list):
+                    main_dict[key].append(data_dict[key])
+                else:
+                    main_dict[key] = [main_dict[key], data_dict[key]]
+            else:
+                main_dict[key] = data_dict[key]
 
     def parse_title(self):
         if "+-o" not in self.line:
@@ -82,6 +94,10 @@ def parse_title(self):
 
         return name, data
 
+    def warn_if_no_struct(self, data: str | dict | list):
+        if isinstance(data, str):
+            logger.warning("No struct found in a title, should always have one\n---> " + data)
+
     def handle_anomalies(self, dictio: dict, data: str, key: str) -> bool:
         """
             some values overflow on the few next lines
@@ -132,13 +148,30 @@ def iterate_children(self, depth: int, data_tree: dict):
         while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'):
             if self.line[depth: depth + 3] == '+-o':
                 name = self.parse_title()[0]
-                self.check_key_uniqueness(data_tree, name)
-                data_tree[name] = {}
-                self.recursive_call(data_tree[name])
+                new_child = self.setup_new_child(data_tree, name)
+                self.recursive_call(new_child)
 
             else:
                 self.get_line()
 
+    def setup_new_child(self, data_tree, key):
+        """ This function is dedicated to iterate_child, it handles the special cases
+            where a node name is already present for the same parent """
+
+        if data_tree.get(key):
+            if isinstance(data_tree[key], list):
+                # case already list of data nodes
+                data_tree[key].append({})
+            else:
+                # case currently single data node
+                data_tree[key] = [data_tree[key], {}]
+            return data_tree[key][-1]
+
+        else:
+            # case new key
+            data_tree[key] = {}
+            return data_tree[key]
+
     def recursive_fun(self, data_tree: dict):
         is_leaf = False
         self.get_line()
@@ -146,8 +179,11 @@ def recursive_fun(self, data_tree: dict):
         # check if we're at the start of a node
         self.check_start_node()
 
-        additional_data = self.parse_title()[1]
-        additional_data = string_parser.parse(additional_data)
+        # try to get a struct out of the data
+        title_data = self.parse_title()[1]
+        additional_data = string_parser.parse(title_data) or title_data
+        self.warn_if_no_struct(additional_data)
+
         self.dict_update(data_tree, additional_data)
 
         depth = self.line.index('o')  # to identify the other nodes that have the same parent
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
index f82cf02..8193f80 100644
--- a/tests/test_string_parser.py
+++ b/tests/test_string_parser.py
@@ -32,14 +32,14 @@ class TestStringParser(SysdiagnoseTestCase):
     ]
 
     expected_detect = [
-        ('key val, k2 v2', sp.DataType.XML_LIKE),
+        ('key val, k2 v2', sp.DataType.XML_DICT),
         ('li1, li2, li3, li4', sp.DataType.LIST),
-        (' k1     v1  ,  k2 v2,   k3    v3      ,k4 v4    ', sp.DataType.XML_LIKE),
-        ('k11 v11', sp.DataType.XML_LIKE),
+        (' k1     v1  ,  k2 v2,   k3    v3      ,k4 v4    ', sp.DataType.XML_DICT),
+        ('k11 v11', sp.DataType.XML_DICT),
         ('    li 1, li   2   , li3', sp.DataType.LIST),
         ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
         ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
-        ('k11 v11,k22 v22', sp.DataType.XML_LIKE),
+        ('k11 v11,k22 v22', sp.DataType.XML_DICT),
         ('()', sp.DataType.STRING),
         ('(li111)', sp.DataType.STRING)
     ]

From 12c566ec499ab48d0dfdece31bcb96b927efd251 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Tue, 23 Sep 2025 07:50:37 +0200
Subject: [PATCH 13/17] Added a line preparation that greatly optimizes the
 recursion depth + some cleanup

---
 .../utils/ioreg_parsers/string_parser.py      | 118 +++++++++++++++---
 tests/test_string_parser.py                   |   1 +
 2 files changed, 105 insertions(+), 14 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index 3fc928a..cec9f98 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -1,5 +1,6 @@
 import re
 from enum import Enum
+import sys
 from sysdiagnose.utils.base import logger
 import uuid
 
@@ -132,6 +133,50 @@ def is_redundent_syntax_regex(s: str):
         Example : "[(<key value, k1 v1>)]" is the same as <key value, k1 v1> """
     return re.search(r'^[(){}\[\]<>""]+$', s)
 
+def prepare_line(line: str) -> str:
+    """ remove unnecessary double-quotes
+    quotes are needed when a comma is inside.
+        example :
+        <key1 val1, k2 "hello, world"> != <key1 val1, k2 hello, world>
+
+    Note : regex cant be used, need to be statefull i.e. consider opening and closing quotes
+        example that doesnt work with regex: "a,"b"c,"
+        gives : '"a,bc,"'
+        should give : '"a,"b"c,"'
+        (the quotes in "a," aren't removed bcs of the comma, so "b" is detected as a string)
+    """
+    inside = False
+    opening_pos = None
+    skipping = False
+    parse_char = (',', '=', '{', '}', '(', ')')
+    line = line.strip()
+
+    i = 0
+    while i < len(line):
+        if line[i] == '"':
+            if inside:
+                if not skipping:
+                    line = line[:i] + line[i + 1:]    # remove last "
+                    line = line[:opening_pos] + line[opening_pos + 1:]    # remove first "
+                    i -= 1
+                else:
+                    i += 1
+                inside = False
+
+            else:
+                inside = True
+                opening_pos = i
+                skipping = False
+                i += 1
+            continue
+
+        if inside and line[i] in parse_char:
+            skipping = True
+
+        i += 1
+
+    return line
+
 def check_key_uniqueness(dictio: dict, key: str):
     if dictio.get(key):
         logger.warning('Warning : Key is already in dictionary, data may be lost\n---> ' + key)
@@ -196,6 +241,7 @@ def parse_type(input_string: str, type: DataType):
 def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
     for key in final_struct:
         elem = final_struct[key]
+        #return resolve_tag_list_dict(final_struct, elem, key, tag, constructed)
 
         if isinstance(elem, str) and tag in elem:
             if isinstance(constructed, str):
@@ -223,6 +269,35 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
         elif isinstance(elem, dict):
             if resolve_tag_dict(elem, tag, constructed):
                 return True
+        return False
+
+def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str):
+    if isinstance(elem, str) and tag in elem:
+        if isinstance(constructed, str):
+            final_struct[key] = final_struct[key].replace(tag, constructed)
+        else:
+            check_anomaly(elem, tag)
+            final_struct[key] = constructed
+        return True
+
+    elif isinstance(key, str) and tag in key:   # only for dict, key is int for list
+        if isinstance(constructed, str):
+            new_key = key.replace(tag, constructed)
+            value = final_struct[key]
+            del final_struct[key]
+            final_struct[new_key] = value
+        else:
+            logger.error("Error : Trying to use a struct as a key in a dict")
+            final_struct[key] = constructed
+        return True
+
+    elif isinstance(elem, list):
+        if resolve_tag_list(elem, tag, constructed):
+            return True
+
+    elif isinstance(elem, dict):
+        if resolve_tag_dict(elem, tag, constructed):
+            return True
 
     return False
 
@@ -230,8 +305,8 @@ def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
 def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
     for i in range(len(final_struct)):
         elem = final_struct[i]
+        #return resolve_tag_list_dict(final_struct, elem, i, tag, constructed)
 
-        # TODO repetition with resolve_tag_dict, put in a func
         if isinstance(elem, str) and tag in elem:
             if isinstance(constructed, str):
                 final_struct[i] = final_struct[i].replace(tag, constructed)
@@ -247,9 +322,7 @@ def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
         elif isinstance(elem, dict):
             if resolve_tag_dict(elem, tag, constructed):
                 return True
-
-    return False
-
+        return False
 
 def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str):
     if isinstance(final_struct, dict):
@@ -258,7 +331,6 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l
     elif isinstance(final_struct, list):
         resolve_tag_list(final_struct, tag, constructed)
 
-    # TODO struct in string doesnt work, for example (<some>)
     elif isinstance(final_struct, str):
         if not isinstance(constructed, str):
             if final_struct.replace(tag, "") == '()':
@@ -280,15 +352,10 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l
     return final_struct
 
 
-def parse(data_string: str, first_run: bool = True):
-    data_string = data_string.strip()
-    # if first_run: print('========= ' + data_string)
-
-    # dont parse if too long
-    if first_run and len(data_string) > 10000:
-        logger.warning('Skipped a too long lines with ' + str(len(data_string)) + ' characters')
-        return data_string
+def parse_main_loop(data_string: str, depth: dict):
+    depth['value'] += 1
 
+    # Detection
     hit = Detect(data_string)
     final_struct = None
 
@@ -304,7 +371,7 @@ def parse(data_string: str, first_run: bool = True):
     data_string = data_string.replace(hit.whole_match, tag, 1)
 
     # recursion
-    final_struct = parse(data_string, False)
+    final_struct = parse_main_loop(data_string, depth)
 
     # reconstruct data structure
     if not final_struct:
@@ -313,3 +380,26 @@ def parse(data_string: str, first_run: bool = True):
         final_struct = resolve_tag(final_struct, tag, constructed)
 
     return final_struct
+
+def parse(data_string: str):
+    # make it a struct so it is passed by reference
+    depth = {'value': 0}
+
+    # increase recursion depth, default is at 1000
+    sys.setrecursionlimit(3000)
+
+    # greatly reduce recursion depth i.e. 80 000+ chars parsed against max 10 000 chars before
+    data_string = prepare_line(data_string)
+
+    try:
+        data_string = parse_main_loop(data_string, depth)
+    except RecursionError:
+        logger.warning("Skipped line with " + str(len(data_string)) + " characters. "
+                       "Recursion depth : " + str(depth['value']) + "\n"
+                       "--> max recursion depth can be increased in utils/string_parser.py"
+                       " in parse(). Feel free to try as high as needed to parse this line.")
+
+    return data_string
+
+
+print(parse('<k1 v1, k2 v2, k:3 (li1 , li2 ,li3, li4 )  ,k4 v4 >'))
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
index 8193f80..11f123b 100644
--- a/tests/test_string_parser.py
+++ b/tests/test_string_parser.py
@@ -53,6 +53,7 @@ def test_detect(self):
     def test_parsing(self):
         for test_val, expected in zip(self.test_list, self.expected_parsed):
             result = sp.parse(test_val)
+            print(result)
             self.assertTrue(result == expected)
 
 

From 58a0524e8ac47e6099914ea5394a030820b00556 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Tue, 23 Sep 2025 13:40:00 +0200
Subject: [PATCH 14/17] Cleaned code releated to ioreg and stringparsers

---
 src/sysdiagnose/parsers/ioacpiplane.py        |  31 +++++
 src/sysdiagnose/parsers/iodevicetree.py       |  32 +++++
 src/sysdiagnose/parsers/iofirewire.py         |  32 +++++
 src/sysdiagnose/parsers/iopower.py            |  32 +++++
 src/sysdiagnose/parsers/iousb.py              |  32 +++++
 .../utils/ioreg_parsers/string_parser.py      | 119 ++++++------------
 .../utils/ioreg_parsers/structure_parser.py   |  10 +-
 tests/test_string_parser.py                   |   3 +-
 8 files changed, 204 insertions(+), 87 deletions(-)
 create mode 100644 src/sysdiagnose/parsers/ioacpiplane.py
 create mode 100644 src/sysdiagnose/parsers/iodevicetree.py
 create mode 100644 src/sysdiagnose/parsers/iofirewire.py
 create mode 100644 src/sysdiagnose/parsers/iopower.py
 create mode 100644 src/sysdiagnose/parsers/iousb.py

diff --git a/src/sysdiagnose/parsers/ioacpiplane.py b/src/sysdiagnose/parsers/ioacpiplane.py
new file mode 100644
index 0000000..18f6d75
--- /dev/null
+++ b/src/sysdiagnose/parsers/ioacpiplane.py
@@ -0,0 +1,31 @@
+#! /usr/bin/env python3
+
+import os
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+
+
+class IOACPIPlaneParser(BaseParserInterface):
+    description = "IOACPIPlane.txt file parser"
+    format = "json"
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IOACPIPlane.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        log_file = self.get_log_files()[0]
+        data_tree = {}
+
+        try:
+            logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+            p = IORegStructParser()
+            data_tree = p.parse(log_file)
+
+        except Exception:
+            logger.exception("IOACPIPlane parsing crashed")
+
+        return data_tree
diff --git a/src/sysdiagnose/parsers/iodevicetree.py b/src/sysdiagnose/parsers/iodevicetree.py
new file mode 100644
index 0000000..b803adb
--- /dev/null
+++ b/src/sysdiagnose/parsers/iodevicetree.py
@@ -0,0 +1,32 @@
+#! /usr/bin/env python3
+
+import os
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+
+
+class IODeviceTreeParser(BaseParserInterface):
+    description = "IODeviceTree.txt file parser"
+    format = "json"
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IODeviceTree.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        log_files = self.get_log_files()
+        data_tree = {}
+
+        for log_file in log_files:
+            try:
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                p = IORegStructParser()
+                data_tree = p.parse(log_file)
+
+            except Exception:
+                logger.exception("IODeviceTree parsing crashed")
+
+        return data_tree
diff --git a/src/sysdiagnose/parsers/iofirewire.py b/src/sysdiagnose/parsers/iofirewire.py
new file mode 100644
index 0000000..639f2e2
--- /dev/null
+++ b/src/sysdiagnose/parsers/iofirewire.py
@@ -0,0 +1,32 @@
+#! /usr/bin/env python3
+
+import os
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+
+
+class IOFireWireParser(BaseParserInterface):
+    description = "IOFireWire.txt file parser"
+    format = "json"
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IOFireWire.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        log_files = self.get_log_files()
+        data_tree = {}
+
+        for log_file in log_files:
+            try:
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                p = IORegStructParser()
+                data_tree = p.parse(log_file)
+
+            except Exception:
+                logger.exception("IOFireWire parsing crashed")
+
+        return data_tree
diff --git a/src/sysdiagnose/parsers/iopower.py b/src/sysdiagnose/parsers/iopower.py
new file mode 100644
index 0000000..08a9087
--- /dev/null
+++ b/src/sysdiagnose/parsers/iopower.py
@@ -0,0 +1,32 @@
+#! /usr/bin/env python3
+
+import os
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+
+
+class IOPowerParser(BaseParserInterface):
+    description = "IOPower.txt file parser"
+    format = "json"
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IOPower.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        log_files = self.get_log_files()
+        data_tree = {}
+
+        for log_file in log_files:
+            try:
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                p = IORegStructParser()
+                data_tree = p.parse(log_file)
+
+            except Exception:
+                logger.exception("IOPower parsing crashed")
+
+        return data_tree
diff --git a/src/sysdiagnose/parsers/iousb.py b/src/sysdiagnose/parsers/iousb.py
new file mode 100644
index 0000000..30688d2
--- /dev/null
+++ b/src/sysdiagnose/parsers/iousb.py
@@ -0,0 +1,32 @@
+#! /usr/bin/env python3
+
+import os
+from sysdiagnose.utils.base import BaseParserInterface, SysdiagnoseConfig, logger
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+
+
+class IOServiceParser(BaseParserInterface):
+    description = "IOUSB.txt file parser"
+    format = "json"
+
+    def __init__(self, config: SysdiagnoseConfig, case_id: str):
+        super().__init__(__file__, config, case_id)
+
+    def get_log_files(self) -> list:
+        log_file = "ioreg/IOUSB.txt"
+        return [os.path.join(self.case_data_subfolder, log_file)]
+
+    def execute(self) -> list | dict:
+        log_files = self.get_log_files()
+        data_tree = {}
+
+        for log_file in log_files:
+            try:
+                logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file})
+                p = IORegStructParser()
+                data_tree = p.parse(log_file)
+
+            except Exception:
+                logger.exception("IOUSB parsing crashed")
+
+        return data_tree
diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index cec9f98..fd97b22 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -29,7 +29,8 @@ def detect_type(self, input: str):
 
             CURLY_DICT : like xml_dict but with {} instead of <>
 
-            LIST : data in parentheses with at least one comma
+            LIST : data in parentheses ('[]', '()') or d-quotes with at least one comma
+                   Note : most of basic d-quotes have been sinitized in prepare_data()
 
             STRING : parentheses that dont contain any comma.
                 example : I'm good at coding (not really)  <-- shouldn't be a list, simply text
@@ -62,7 +63,7 @@ def detect_type(self, input: str):
             self.assign_best(hit, DataType.STRING)
 
         # find simple double-quotes ex : "hello world"
-        hit = re.search(r'("[^"]*")', input)
+        hit = re.search(r'"([^"]*)"', input)
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.STRING)
 
@@ -75,7 +76,7 @@ def assign_best(self, hit: re.Match, type: DataType):
         self._best_content = hit.group(1)
         self._found = True
 
-    def find_smallest(self, regex: str, data: str):
+    def find_smallest(self, regex: str, data: str) -> re.Match:
         pattern = re.compile(regex)
         matches = list(pattern.finditer(data))
         if not matches:
@@ -128,7 +129,7 @@ def check_anomaly(s: str, tag: str):
         logger.warning("Warning : Anomaly: some data was right next to "
                        "the struct (without space), this data is thus lost\n---> " + structured)
 
-def is_redundent_syntax_regex(s: str):
+def is_redundent_syntax_regex(s: str) -> re.Match:
     """ If we have for example ([ ]) around a struct, we consider it useless
         Example : "[(<key value, k1 v1>)]" is the same as <key value, k1 v1> """
     return re.search(r'^[(){}\[\]<>""]+$', s)
@@ -219,7 +220,7 @@ def parse_dict(input_string: str, separator: str) -> dict:
     return res
 
 
-def parse_type(input_string: str, type: DataType):
+def parse_type(input_string: str, type: DataType) -> dict | list | str:
     match type:
         case DataType.XML_DICT:
             return parse_dict(input_string, ' ')
@@ -237,41 +238,7 @@ def parse_type(input_string: str, type: DataType):
             logger.error("Error : Type not found in parse_type(). (Note : "
                          "you probably forgot to add it to the match case)")
 
-
-def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list):
-    for key in final_struct:
-        elem = final_struct[key]
-        #return resolve_tag_list_dict(final_struct, elem, key, tag, constructed)
-
-        if isinstance(elem, str) and tag in elem:
-            if isinstance(constructed, str):
-                final_struct[key] = final_struct[key].replace(tag, constructed)
-            else:
-                check_anomaly(elem, tag)
-                final_struct[key] = constructed
-            return True
-
-        elif isinstance(key, str) and tag in key:
-            if isinstance(constructed, str):
-                new_key = key.replace(tag, constructed)
-                value = final_struct[key]
-                del final_struct[key]
-                final_struct[new_key] = value
-            else:
-                logger.error("Error : Trying to use a struct as a key in a dict")
-                final_struct[key] = constructed
-            return True
-
-        elif isinstance(elem, list):
-            if resolve_tag_list(elem, tag, constructed):
-                return True
-
-        elif isinstance(elem, dict):
-            if resolve_tag_dict(elem, tag, constructed):
-                return True
-        return False
-
-def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str):
+def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, key: str, tag: str, constructed: dict | list | str) -> bool:
     if isinstance(elem, str) and tag in elem:
         if isinstance(constructed, str):
             final_struct[key] = final_struct[key].replace(tag, constructed)
@@ -301,30 +268,38 @@ def resolve_tag_list_dict(final_struct: list | dict, elem: list | dict | str, ke
 
     return False
 
+def resolve_tag_dict(final_struct: dict, tag: str, constructed: dict | list | str) -> bool:
+    for key in final_struct:
+        elem = final_struct[key]
+        if resolve_tag_list_dict(final_struct, elem, key, tag, constructed):
+            return True
 
-def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list):
+    return False
+
+def resolve_tag_list(final_struct: list, tag: str, constructed: dict | list | str):
     for i in range(len(final_struct)):
         elem = final_struct[i]
-        #return resolve_tag_list_dict(final_struct, elem, i, tag, constructed)
-
-        if isinstance(elem, str) and tag in elem:
-            if isinstance(constructed, str):
-                final_struct[i] = final_struct[i].replace(tag, constructed)
-            else:
-                check_anomaly(elem, tag)
-                final_struct[i] = constructed
+        if resolve_tag_list_dict(final_struct, elem, i, tag, constructed):
             return True
 
-        elif isinstance(elem, list):
-            if resolve_tag_list(elem, tag, constructed):
-                return True
+    return False
+
+def resolve_tag_str(final_struct: dict | list | str, tag: str, constructed: dict | list | str) -> dict | list | str:
+    if not isinstance(constructed, str):
+        if final_struct.replace(tag, "") == '()':
+            final_struct = constructed
+        else:
+            user_friendly = final_struct.replace(tag, "[STRUCT]")
+            lost_data = final_struct.replace(tag, "")
+            if not is_redundent_syntax_regex(lost_data) and lost_data:
+                logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly)
+            final_struct = constructed
+    else:
+        final_struct = final_struct.replace(tag, constructed)
 
-        elif isinstance(elem, dict):
-            if resolve_tag_dict(elem, tag, constructed):
-                return True
-        return False
+    return final_struct
 
-def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str):
+def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | list | str) -> dict | list | str:
     if isinstance(final_struct, dict):
         resolve_tag_dict(final_struct, tag, constructed)
 
@@ -332,27 +307,17 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l
         resolve_tag_list(final_struct, tag, constructed)
 
     elif isinstance(final_struct, str):
-        if not isinstance(constructed, str):
-            if final_struct.replace(tag, "") == '()':
-                final_struct = constructed
-            else:
-                user_friendly = final_struct.replace(tag, "[STRUCT]")
-                lost_data = final_struct.replace(tag, "")
-                if not is_redundent_syntax_regex(lost_data):
-                    logger.warning("Warning : trying to incorporate dict/list in a string :\n---> " + user_friendly)
-                final_struct = constructed
-        else:
-            final_struct = final_struct.replace(tag, constructed)
+        final_struct = resolve_tag_str(final_struct, tag, constructed)
 
     else:
         logger.error('Error : struct type not found')
         exit(1)
 
-    # return is necessary bcs strings are not passed by reference in python
+    # return is necessary, strings are not passed by reference in python
     return final_struct
 
 
-def parse_main_loop(data_string: str, depth: dict):
+def parse_main_loop(data_string: str, depth: dict) -> dict | list | str:
     depth['value'] += 1
 
     # Detection
@@ -361,7 +326,7 @@ def parse_main_loop(data_string: str, depth: dict):
 
     # recursion stop
     if not hit.found:
-        return None
+        return data_string
 
     # form basic struct
     constructed = parse_type(hit.content, hit.type)
@@ -374,14 +339,11 @@ def parse_main_loop(data_string: str, depth: dict):
     final_struct = parse_main_loop(data_string, depth)
 
     # reconstruct data structure
-    if not final_struct:
-        final_struct = constructed      # at the root
-    else:
-        final_struct = resolve_tag(final_struct, tag, constructed)
+    final_struct = resolve_tag(final_struct, tag, constructed)
 
     return final_struct
 
-def parse(data_string: str):
+def parse(data_string: str) -> dict | list | str:
     # make it a struct so it is passed by reference
     depth = {'value': 0}
 
@@ -392,7 +354,7 @@ def parse(data_string: str):
     data_string = prepare_line(data_string)
 
     try:
-        data_string = parse_main_loop(data_string, depth)
+        data_string = parse_main_loop(data_string, depth) or data_string
     except RecursionError:
         logger.warning("Skipped line with " + str(len(data_string)) + " characters. "
                        "Recursion depth : " + str(depth['value']) + "\n"
@@ -400,6 +362,3 @@ def parse(data_string: str):
                        " in parse(). Feel free to try as high as needed to parse this line.")
 
     return data_string
-
-
-print(parse('<k1 v1, k2 v2, k:3 (li1 , li2 ,li3, li4 )  ,k4 v4 >'))
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index 016ce3f..b9acf48 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -67,7 +67,7 @@ def parse_values(self, data_dict: dict):
             if constructed:
                 data_dict[key] = constructed
 
-    def dict_update(self, main_dict, data_dict):
+    def dict_update(self, main_dict: dict, data_dict: dict):
         """ Redefining the dict.update function to handle key collisions """
 
         for key in data_dict:
@@ -79,10 +79,10 @@ def dict_update(self, main_dict, data_dict):
             else:
                 main_dict[key] = data_dict[key]
 
-    def parse_title(self):
+    def parse_title(self) -> tuple:
         if "+-o" not in self.line:
             logger.warning("'non-title' line given to title parser, should not happen")
-            return ""
+            return "", ""
 
         whole_title = self.line.split("+-o", 1)[1].strip()
 
@@ -154,8 +154,8 @@ def iterate_children(self, depth: int, data_tree: dict):
             else:
                 self.get_line()
 
-    def setup_new_child(self, data_tree, key):
-        """ This function is dedicated to iterate_child, it handles the special cases
+    def setup_new_child(self, data_tree: dict, key: str) -> dict:
+        """ This function is dedicated to iterate_children, it handles the special cases
             where a node name is already present for the same parent """
 
         if data_tree.get(key):
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
index 11f123b..6805a73 100644
--- a/tests/test_string_parser.py
+++ b/tests/test_string_parser.py
@@ -38,7 +38,7 @@ class TestStringParser(SysdiagnoseTestCase):
         ('k11 v11', sp.DataType.XML_DICT),
         ('    li 1, li   2   , li3', sp.DataType.LIST),
         ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
-        ('li1 , li2 ,li3, li4 ', sp.DataType.LIST),
+        ('li11, li22', sp.DataType.LIST),
         ('k11 v11,k22 v22', sp.DataType.XML_DICT),
         ('()', sp.DataType.STRING),
         ('(li111)', sp.DataType.STRING)
@@ -53,7 +53,6 @@ def test_detect(self):
     def test_parsing(self):
         for test_val, expected in zip(self.test_list, self.expected_parsed):
             result = sp.parse(test_val)
-            print(result)
             self.assertTrue(result == expected)
 
 

From 4394482b601648ca8fcf4968d11b0f02570c5059 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Wed, 24 Sep 2025 10:14:37 +0200
Subject: [PATCH 15/17] bug fix concerning strings inside <> and cleanup

---
 .../utils/ioreg_parsers/string_parser.py      |   9 +-
 .../utils/ioreg_parsers/structure_parser.py   |   1 -
 tests/test_parsers_ioservice.py               | 332 ++++++++----------
 3 files changed, 146 insertions(+), 196 deletions(-)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
index fd97b22..4ec81e5 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
@@ -24,7 +24,7 @@ def __init__(self, input_string: str):
     def detect_type(self, input: str):
         """         Note on the match types
 
-            XML_DICT : data inside <>
+            XML_DICT : data inside <> with at least a comma or space between chars
                 excluded : <> , < > , <       >
 
             CURLY_DICT : like xml_dict but with {} instead of <>
@@ -38,7 +38,7 @@ def detect_type(self, input: str):
         """  # noqa: W605
 
         # find xml like dict ex : <key value, k2 v2>
-        hit = self.find_smallest(r'<([^<>]*[^\s<>][^<>]*)>', input)
+        hit = self.find_smallest(r'<([^<>]*([,]|[^\s<>][\s]+[^\s<>])[^<>]*)>', input)
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.XML_DICT)
 
@@ -52,6 +52,11 @@ def detect_type(self, input: str):
         if hit and len(hit.group(0)) < self._best_len:
             self.assign_best(hit, DataType.LIST)
 
+        # find simple string data in <> ex : <648a4c>
+        hit = re.search(r'(<[^,<>\s]*>)', input)
+        if hit and len(hit.group(0)) < self._best_len:
+            self.assign_best(hit, DataType.STRING)
+
         # find simple parentheses without ',' ex : (hello world)
         hit = re.search(r'(\([^,)(]*\))', input)
         if hit and len(hit.group(0)) < self._best_len:
diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index b9acf48..6e4a292 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -54,7 +54,6 @@ def fetch_node_data(self, data_tree: dict) -> bool:
             self.get_line()
 
         data_dict = self.node_data_to_json(node_data)
-        # TODO test this
         self.parse_values(data_dict)
         self.dict_update(data_tree, data_dict)
 
diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py
index b402aca..c0d0f1e 100644
--- a/tests/test_parsers_ioservice.py
+++ b/tests/test_parsers_ioservice.py
@@ -1,4 +1,4 @@
-from sysdiagnose.parsers.ioservice import IOServiceParser
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
 from tests import SysdiagnoseTestCase
 import unittest
 import io
@@ -7,48 +7,47 @@
 class TestParsersIOService(SysdiagnoseTestCase):
 
     def test_basic_structure(self):
-        for case_id, _ in self.sd.cases().items():
-            p = IOServiceParser(self.sd.config, case_id=case_id)
+        p = IORegStructParser()
 
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
-        start_file = io.BytesIO(b"""+-o Root node
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
   | {
   |   "data 1" = "value 1"
   |   "data 2" = "value 2"
   | }
   | 
-  +-o Node 2
+  +-o Node 2 <class test2, key2 val2>
     | {
     |   "#address-cells" = <02000000>
     |   "AAPL,phandle" = <01000000> 
     | }
     | 
-    +-o Node 3
+    +-o Node 3 <class test3, key3 val3>
     | | {
     | |   "data 31" = "value 31"
     | |   "data 32" = "value 32"
     | | }
     | | 
-    | +-o Leaf 1
+    | +-o Leaf 1 <class test11, key11 val11>
     | | {
     | |   "data l1" = "value l1"
     | |   "data l2" = "value l2"
     | | }
     | |
-    | +-o Leaf 2
+    | +-o Leaf 2 <class test22, key22 val22>
     |   {
     |     "data l3" = "value l3"
     |     "data l4" = "value l4"
     |   }
     | 
-    +-o Leaf 3
+    +-o Leaf 3 <class test33, key33 val33>
     | {
     |   "data l5" = "value L5"
     |   "data l6" = "value l6"
     | }
     |
-    +-o Leaf 4
+    +-o Leaf 4 <class test44, key44 val44>
         {
           "data 51" = "value 51"
           "data 52" = "value 52"
@@ -57,63 +56,46 @@ def test_basic_structure(self):
 """)  # noqa: W291, W293
 
         expected = {
-            "Children": [
-                {
-                    "Children": [
-                        {
-                            "Children": [
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l1": "\"value l1\"",
-                                        "data l2": "\"value l2\""
-                                    },
-                                    "Name": "Leaf 1"
-                                },
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l3": "\"value l3\"",
-                                        "data l4": "\"value l4\""
-                                    },
-                                    "Name": "Leaf 2"
-                                }
-                            ],
-                            "Data": {
-                                "data 31": "\"value 31\"",
-                                "data 32": "\"value 32\""
-                            },
-                            "Name": "Node 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data l5": "\"value L5\"",
-                                "data l6": "\"value l6\""
-                            },
-                            "Name": "Leaf 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data 51": "\"value 51\"",
-                                "data 52": "\"value 52\""
-                            },
-                            "Name": "Leaf 4"
-                        }
-                    ],
-                    "Data": {
-                        "#address-cells": "<02000000>",
-                        "AAPL,phandle": "<01000000>"
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
                     },
-                    "Name": "Node 2"
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
                 }
-            ],
-            "Data": {
-                "data 1": "\"value 1\"",
-                "data 2": "\"value 2\""
-            },
-            "Name": "Root node"
+            }
         }
 
         p.open_file = start_file
@@ -123,36 +105,35 @@ def test_basic_structure(self):
         self.assertTrue(result == expected)
 
     def test_value_overflow_anomaly(self):
-        for case_id, _ in self.sd.cases().items():
-            p = IOServiceParser(self.sd.config, case_id=case_id)
+        p = IORegStructParser()
 
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
-        start_file = io.BytesIO(b"""+-o Root node
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
   | {
   |   "data 1" = "value 1"
   |   "data 2" = "value 2"
   | }
   | 
-  +-o Node 2
+  +-o Node 2 <class test2, key2 val2>
     | {
     |   "#address-cells" = <02000000>
     |   "AAPL,phandle" = <01000000> 
     | }
     | 
-    +-o Node 3
+    +-o Node 3 <class test3, key3 val3>
     | | {
     | |   "data 31" = "value 31"
     | |   "data 32" = "value 32"
     | | }
     | | 
-    | +-o Leaf 1
+    | +-o Leaf 1 <class test11, key11 val11>
     | | {
     | |   "data l1" = "value l1"
     | |   "data l2" = "value l2"
     | | }
     | |
-    | +-o Leaf 2
+    | +-o Leaf 2 <class test22, key22 val22>
     |   {
     |     "data l3" = "value l3"
     |     "data l4" = "value aaaa
@@ -162,13 +143,13 @@ def test_value_overflow_anomaly(self):
 "
     |   }
     | 
-    +-o Leaf 3
+    +-o Leaf 3  <class test33, key33 val33>
     | {
     |   "data l5" = "value L5"
     |   "data l6" = "value l6"
     | }
     |
-    +-o Leaf 4
+    +-o Leaf 4 <class test44, key44 val44>
         {
           "data 51" = "value 51"
           "data 52" = "value 52"
@@ -177,63 +158,46 @@ def test_value_overflow_anomaly(self):
 """)  # noqa: W291, W293
 
         expected = {
-            "Children": [
-                {
-                    "Children": [
-                        {
-                            "Children": [
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l1": "\"value l1\"",
-                                        "data l2": "\"value l2\""
-                                    },
-                                    "Name": "Leaf 1"
-                                },
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l3": "\"value l3\"",
-                                        "data l4": "\"value aaaabbbbccccdddd\""
-                                    },
-                                    "Name": "Leaf 2"
-                                }
-                            ],
-                            "Data": {
-                                "data 31": "\"value 31\"",
-                                "data 32": "\"value 32\""
-                            },
-                            "Name": "Node 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data l5": "\"value L5\"",
-                                "data l6": "\"value l6\""
-                            },
-                            "Name": "Leaf 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data 51": "\"value 51\"",
-                                "data 52": "\"value 52\""
-                            },
-                            "Name": "Leaf 4"
-                        }
-                    ],
-                    "Data": {
-                        "#address-cells": "<02000000>",
-                        "AAPL,phandle": "<01000000>"
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
                     },
-                    "Name": "Node 2"
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
                 }
-            ],
-            "Data": {
-                "data 1": "\"value 1\"",
-                "data 2": "\"value 2\""
-            },
-            "Name": "Root node"
+            }
         }
 
         p.open_file = start_file
@@ -243,48 +207,47 @@ def test_value_overflow_anomaly(self):
         self.assertTrue(result == expected)
 
     def test_non_ascii_byte_anomaly(self):
-        for case_id, _ in self.sd.cases().items():
-            p = IOServiceParser(self.sd.config, case_id=case_id)
+        p = IORegStructParser()
 
         # careful, spaces and structure is important
         # This simulates an open file object, as if we opened it with open(path, 'rb')
-        start_file = io.BytesIO(b"""+-o Root node
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
   | {
   |   "data 1" = "value 1"
   |   "data 2" = "value 2"
   | }
   | 
-  +-o Node 2
+  +-o Node 2 <class test2, key2 val2>
     | {
     |   "#address-cells" = <02000000>
     |   "AAPL,phandle" = <01000000> 
     | }
     | 
-    +-o Node 3
+    +-o Node 3 <class test3, key3 val3>
     | | {
     | |   "data 31" = "value 31"
     | |   "data 32" = "value 32"
     | | }
     | | 
-    | +-o Leaf 1
+    | +-o Leaf 1 <class test11, key11 val11>
     | | {
     | |   "data l1" = "value l1"
     | |   "data l2" = "value l2"
     | | }
     | |
-    | +-o Leaf 2
+    | +-o Leaf 2 <class test22, key22 val22>
     |   {
     |     "data l3" = "value l3"
     |     "data l4" = "value -->\xbf<--"
     |   }
     | 
-    +-o Leaf 3
+    +-o Leaf 3 <class test33, key33 val33>
     | {
     |   "data l5" = "value L5"
     |   "data l6" = "value l6"
     | }
     |
-    +-o Leaf 4
+    +-o Leaf 4 <class test44, key44 val44>
         {
           "data 51" = "value 51"
           "data 52" = "value 52"
@@ -293,63 +256,46 @@ def test_non_ascii_byte_anomaly(self):
 """)  # noqa: W291, W293
 
         expected = {
-            "Children": [
-                {
-                    "Children": [
-                        {
-                            "Children": [
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l1": "\"value l1\"",
-                                        "data l2": "\"value l2\""
-                                    },
-                                    "Name": "Leaf 1"
-                                },
-                                {
-                                    "Children": [],
-                                    "Data": {
-                                        "data l3": "\"value l3\"",
-                                        "data l4": "\"value -->?<--\""
-                                    },
-                                    "Name": "Leaf 2"
-                                }
-                            ],
-                            "Data": {
-                                "data 31": "\"value 31\"",
-                                "data 32": "\"value 32\""
-                            },
-                            "Name": "Node 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data l5": "\"value L5\"",
-                                "data l6": "\"value l6\""
-                            },
-                            "Name": "Leaf 3"
-                        },
-                        {
-                            "Children": [],
-                            "Data": {
-                                "data 51": "\"value 51\"",
-                                "data 52": "\"value 52\""
-                            },
-                            "Name": "Leaf 4"
-                        }
-                    ],
-                    "Data": {
-                        "#address-cells": "<02000000>",
-                        "AAPL,phandle": "<01000000>"
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
                     },
-                    "Name": "Node 2"
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
                 }
-            ],
-            "Data": {
-                "data 1": "\"value 1\"",
-                "data 2": "\"value 2\""
-            },
-            "Name": "Root node"
+            }
         }
 
         p.open_file = start_file

From e5a9ebaea876ea63d4444668f04acf398f247421 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Thu, 25 Sep 2025 13:41:47 +0200
Subject: [PATCH 16/17] dissociated ioreg parsers + small fixes

---
 src/sysdiagnose/parsers/iousb.py   |   2 +-
 tests/test_parsers_ioacpiplane.py  | 320 +++++++++++++++++++++++++++++
 tests/test_parsers_iodevicetree.py | 320 +++++++++++++++++++++++++++++
 tests/test_parsers_iofirewire.py   | 320 +++++++++++++++++++++++++++++
 tests/test_parsers_iopower.py      | 320 +++++++++++++++++++++++++++++
 tests/test_parsers_ioservice.py    |  11 +
 tests/test_parsers_iousb.py        | 320 +++++++++++++++++++++++++++++
 7 files changed, 1612 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_parsers_ioacpiplane.py
 create mode 100644 tests/test_parsers_iodevicetree.py
 create mode 100644 tests/test_parsers_iofirewire.py
 create mode 100644 tests/test_parsers_iopower.py
 create mode 100644 tests/test_parsers_iousb.py

diff --git a/src/sysdiagnose/parsers/iousb.py b/src/sysdiagnose/parsers/iousb.py
index 30688d2..1b1739a 100644
--- a/src/sysdiagnose/parsers/iousb.py
+++ b/src/sysdiagnose/parsers/iousb.py
@@ -5,7 +5,7 @@
 from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
 
 
-class IOServiceParser(BaseParserInterface):
+class IOUSBParser(BaseParserInterface):
     description = "IOUSB.txt file parser"
     format = "json"
 
diff --git a/tests/test_parsers_ioacpiplane.py b/tests/test_parsers_ioacpiplane.py
new file mode 100644
index 0000000..e2ebb92
--- /dev/null
+++ b/tests/test_parsers_ioacpiplane.py
@@ -0,0 +1,320 @@
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.ioacpiplane import IOACPIPlaneParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+import os
+
+
+class TestParsersIOACPIPlane(SysdiagnoseTestCase):
+
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IOACPIPlaneParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
+    def test_basic_structure(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3  <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_parsers_iodevicetree.py b/tests/test_parsers_iodevicetree.py
new file mode 100644
index 0000000..ee1e560
--- /dev/null
+++ b/tests/test_parsers_iodevicetree.py
@@ -0,0 +1,320 @@
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.iodevicetree import IODeviceTreeParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+import os
+
+
+class TestParsersIODeviceTree(SysdiagnoseTestCase):
+
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IODeviceTreeParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
+    def test_basic_structure(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3  <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_parsers_iofirewire.py b/tests/test_parsers_iofirewire.py
new file mode 100644
index 0000000..bb49df8
--- /dev/null
+++ b/tests/test_parsers_iofirewire.py
@@ -0,0 +1,320 @@
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.iofirewire import IOFireWireParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+import os
+
+
+class TestParsersIOFireWire(SysdiagnoseTestCase):
+
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IOFireWireParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
+    def test_basic_structure(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3  <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_parsers_iopower.py b/tests/test_parsers_iopower.py
new file mode 100644
index 0000000..7bd0406
--- /dev/null
+++ b/tests/test_parsers_iopower.py
@@ -0,0 +1,320 @@
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.iopower import IOPowerParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+import os
+
+
+class TestParsersIOPower(SysdiagnoseTestCase):
+
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IOPowerParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
+    def test_basic_structure(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3  <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_parsers_ioservice.py b/tests/test_parsers_ioservice.py
index c0d0f1e..ab54678 100644
--- a/tests/test_parsers_ioservice.py
+++ b/tests/test_parsers_ioservice.py
@@ -1,11 +1,22 @@
 from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.ioservice import IOServiceParser
 from tests import SysdiagnoseTestCase
 import unittest
 import io
+import os
 
 
 class TestParsersIOService(SysdiagnoseTestCase):
 
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IOServiceParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
     def test_basic_structure(self):
         p = IORegStructParser()
 
diff --git a/tests/test_parsers_iousb.py b/tests/test_parsers_iousb.py
new file mode 100644
index 0000000..a58e625
--- /dev/null
+++ b/tests/test_parsers_iousb.py
@@ -0,0 +1,320 @@
+from sysdiagnose.utils.ioreg_parsers.structure_parser import IORegStructParser
+from sysdiagnose.parsers.iousb import IOUSBParser
+from tests import SysdiagnoseTestCase
+import unittest
+import io
+import os
+
+
+class TestParsersIOService(SysdiagnoseTestCase):
+
+    def test_parse_case(self):
+        for case_id, case in self.sd.cases().items():
+            p = IOUSBParser(self.sd.config, case_id=case_id)
+            files = p.get_log_files()
+            self.assertTrue(len(files) > 0)
+
+            p.save_result(force=True)
+            self.assertTrue(os.path.isfile(p.output_file))
+
+    def test_basic_structure(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value l4"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value l4'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_value_overflow_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value aaaa
+bbbb
+cccc
+dddd
+"
+    |   }
+    | 
+    +-o Leaf 3  <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value aaaabbbbccccdddd'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+    def test_non_ascii_byte_anomaly(self):
+        p = IORegStructParser()
+
+        # careful, spaces and structure is important
+        # This simulates an open file object, as if we opened it with open(path, 'rb')
+        start_file = io.StringIO("""+-o Root node <class test1, key1 val1>
+  | {
+  |   "data 1" = "value 1"
+  |   "data 2" = "value 2"
+  | }
+  | 
+  +-o Node 2 <class test2, key2 val2>
+    | {
+    |   "#address-cells" = <02000000>
+    |   "AAPL,phandle" = <01000000> 
+    | }
+    | 
+    +-o Node 3 <class test3, key3 val3>
+    | | {
+    | |   "data 31" = "value 31"
+    | |   "data 32" = "value 32"
+    | | }
+    | | 
+    | +-o Leaf 1 <class test11, key11 val11>
+    | | {
+    | |   "data l1" = "value l1"
+    | |   "data l2" = "value l2"
+    | | }
+    | |
+    | +-o Leaf 2 <class test22, key22 val22>
+    |   {
+    |     "data l3" = "value l3"
+    |     "data l4" = "value -->\xbf<--"
+    |   }
+    | 
+    +-o Leaf 3 <class test33, key33 val33>
+    | {
+    |   "data l5" = "value L5"
+    |   "data l6" = "value l6"
+    | }
+    |
+    +-o Leaf 4 <class test44, key44 val44>
+        {
+          "data 51" = "value 51"
+          "data 52" = "value 52"
+        }
+        
+""")  # noqa: W291, W293
+
+        expected = {
+            'class': 'test1',
+            'key1': 'val1',
+            'data 1': 'value 1',
+            'data 2': 'value 2',
+            'Node 2': {
+                'class': 'test2',
+                'key2': 'val2',
+                '#address-cells': '<02000000>',
+                'AAPL,phandle': '<01000000>',
+                'Node 3': {
+                    'class': 'test3',
+                    'key3': 'val3',
+                    'data 31': 'value 31',
+                    'data 32': 'value 32',
+                    'Leaf 1': {
+                        'class': 'test11',
+                        'key11': 'val11',
+                        'data l1': 'value l1',
+                        'data l2': 'value l2'
+                    },
+                    'Leaf 2': {
+                        'class': 'test22',
+                        'key22': 'val22',
+                        'data l3': 'value l3',
+                        'data l4': 'value -->\xbf<--'
+                    }
+                },
+                'Leaf 3': {
+                    'class': 'test33',
+                    'key33': 'val33',
+                    'data l5': 'value L5',
+                    'data l6': 'value l6'
+                },
+                'Leaf 4': {
+                    'class': 'test44',
+                    'key44': 'val44',
+                    'data 51': 'value 51',
+                    'data 52': 'value 52'
+                }
+            }
+        }
+
+        p.open_file = start_file
+        result = {}
+        p.recursive_fun(result)
+
+        self.assertTrue(result == expected)
+
+
+if __name__ == '__main__':
+    unittest.main()

From eea716a99ff76430751303044d423889e42c04e3 Mon Sep 17 00:00:00 2001
From: Aweinhof <alec.weinhofer@ulb.be>
Date: Thu, 25 Sep 2025 17:56:44 +0200
Subject: [PATCH 17/17] Replaced exit calls, renamed class attributes and moved
 string_parser.py to utils

---
 .../utils/ioreg_parsers/structure_parser.py   | 46 ++++++++-----------
 .../{ioreg_parsers => }/string_parser.py      |  2 +-
 tests/test_string_parser.py                   |  2 +-
 3 files changed, 21 insertions(+), 29 deletions(-)
 rename src/sysdiagnose/utils/{ioreg_parsers => }/string_parser.py (99%)

diff --git a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
index 6e4a292..a174de3 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
+++ b/src/sysdiagnose/utils/ioreg_parsers/structure_parser.py
@@ -1,10 +1,10 @@
 from sysdiagnose.utils.base import logger
-from sysdiagnose.utils.ioreg_parsers import string_parser
+from sysdiagnose.utils import string_parser
 import re
 
 class IORegStructParser:
-    rollback_addr = None
-    line = None
+    __rollback_addr = None
+    __curr_line = None
 
     def __init__(self):
         pass
@@ -19,23 +19,18 @@ def parse(self, file_path):
         return data_tree
 
     def get_line(self):
-        self.rollback_addr = self.open_file.tell()
-        self.line = self.open_file.readline()
-        self.line = self.line.replace('\n', '')
+        self.__rollback_addr = self.open_file.tell()
+        self.__curr_line = self.open_file.readline()
+        self.__curr_line = self.__curr_line.replace('\n', '')
 
     def recursive_call(self, data_tree: dict):
-        self.open_file.seek(self.rollback_addr)
+        self.open_file.seek(self.__rollback_addr)
         self.recursive_fun(data_tree)
 
     def check_start_node(self):
-        if '+-o' not in self.line:
+        if '+-o' not in self.__curr_line:
             logger.error('This is not normal. Recursive function called on random line.')
-            exit(1)
-
-    def not_empty_node_check(self):
-        if not self.rollback_addr:
-            logger.error("+-o in two consecutive lines, not supposed to be possible")
-            exit(1)
+            raise Exception("File has an invalid structure, '+-o' tag was not found in first line")
 
     def check_key_uniqueness(self, dictio: dict, key: str):
         if dictio.get(key):
@@ -45,12 +40,12 @@ def fetch_node_data(self, data_tree: dict) -> bool:
         node_data = []  # array of lines, to be transformed in json
         res = True
 
-        while '+-o' not in self.line:
-            if not self.line:   # end of file
+        while '+-o' not in self.__curr_line:
+            if not self.__curr_line:   # end of file
                 res = False
                 break
 
-            node_data.append(self.line)
+            node_data.append(self.__curr_line)
             self.get_line()
 
         data_dict = self.node_data_to_json(node_data)
@@ -79,11 +74,11 @@ def dict_update(self, main_dict: dict, data_dict: dict):
                 main_dict[key] = data_dict[key]
 
     def parse_title(self) -> tuple:
-        if "+-o" not in self.line:
+        if "+-o" not in self.__curr_line:
             logger.warning("'non-title' line given to title parser, should not happen")
             return "", ""
 
-        whole_title = self.line.split("+-o", 1)[1].strip()
+        whole_title = self.__curr_line.split("+-o", 1)[1].strip()
 
         if "<class" not in whole_title or whole_title[-1] != '>':
             logger.warning("Title doesnt respect the usual <class ... > format, to invesstigate")
@@ -144,8 +139,8 @@ def node_data_to_json(self, data_array: list[str]) -> dict:
         return res
 
     def iterate_children(self, depth: int, data_tree: dict):
-        while self.line and (self.line[depth] == '|' or self.line[depth: depth + 3] == '+-o'):
-            if self.line[depth: depth + 3] == '+-o':
+        while self.__curr_line and (self.__curr_line[depth] == '|' or self.__curr_line[depth: depth + 3] == '+-o'):
+            if self.__curr_line[depth: depth + 3] == '+-o':
                 name = self.parse_title()[0]
                 new_child = self.setup_new_child(data_tree, name)
                 self.recursive_call(new_child)
@@ -185,11 +180,11 @@ def recursive_fun(self, data_tree: dict):
 
         self.dict_update(data_tree, additional_data)
 
-        depth = self.line.index('o')  # to identify the other nodes that have the same parent
+        depth = self.__curr_line.index('o')  # to identify the other nodes that have the same parent
         self.get_line()
 
         # check if its a leaf
-        if self.line[depth] != '|':
+        if self.__curr_line[depth] != '|':
             is_leaf = True
 
         # Fetch the data of the node
@@ -198,11 +193,8 @@ def recursive_fun(self, data_tree: dict):
 
         # stop if we're a leaf
         if is_leaf:
-            self.open_file.seek(self.rollback_addr)
+            self.open_file.seek(self.__rollback_addr)
             return
 
-        # sanity check
-        self.not_empty_node_check()
-
         # Iterates over each child to call the current function
         self.iterate_children(depth, data_tree)
diff --git a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py b/src/sysdiagnose/utils/string_parser.py
similarity index 99%
rename from src/sysdiagnose/utils/ioreg_parsers/string_parser.py
rename to src/sysdiagnose/utils/string_parser.py
index 4ec81e5..1c484bd 100644
--- a/src/sysdiagnose/utils/ioreg_parsers/string_parser.py
+++ b/src/sysdiagnose/utils/string_parser.py
@@ -316,7 +316,7 @@ def resolve_tag(final_struct: dict | list | str, tag: str, constructed: dict | l
 
     else:
         logger.error('Error : struct type not found')
-        exit(1)
+        raise ValueError("Structure passed has to be a dict, a list or a string. Type : " + str(type(final_struct)))
 
     # return is necessary, strings are not passed by reference in python
     return final_struct
diff --git a/tests/test_string_parser.py b/tests/test_string_parser.py
index 6805a73..3f5da07 100644
--- a/tests/test_string_parser.py
+++ b/tests/test_string_parser.py
@@ -1,6 +1,6 @@
 from tests import SysdiagnoseTestCase
 import unittest
-import sysdiagnose.utils.ioreg_parsers.string_parser as sp
+import sysdiagnose.utils.string_parser as sp
 
 
 class TestStringParser(SysdiagnoseTestCase):