More robust docstring parsing (#1323)

mmatera · web-flow · commit 275a3b8da9e6 · 2025-01-29T12:37:24.000-05:00
In the current (master) documentation/doctest implementation, it is assumed that all the docstrings have a "margin" of spaces, from the class indentation. Then, the regular expression looking for doctests
fails to find them if there is not at least a single space at the beginning of the line.

Python 3.13 removes the "left margin" of docstrings, so the regular expression will fail to detect some tests.
This PR changes the regular expression to allow doctests which do not start with a space.

Also, in future versions of Python, tab characters in docstrings will be converted into spaces, which will make to fail some tests. In this PR, tabs in doctests are replaced by sequences of 4 spaces, making the tests more robust under this kind of change.
diff --git a/mathics/builtin/files_io/importexport.py b/mathics/builtin/files_io/importexport.py
@@ -1060,16 +1060,16 @@ class RegisterImport(Builtin):
     >> FilePrint["ExampleData/ExampleData.txt"]
      | Example File Format
      | Created by Angus
-     | 0.629452	0.586355
-     | 0.711009	0.687453
-     | 0.246540	0.433973
-     | 0.926871	0.887255
-     | 0.825141	0.940900
-     | 0.847035	0.127464
-     | 0.054348	0.296494
-     | 0.838545	0.247025
-     | 0.838697	0.436220
-     | 0.309496	0.833591
+     | 0.629452    0.586355
+     | 0.711009    0.687453
+     | 0.246540    0.433973
+     | 0.926871    0.887255
+     | 0.825141    0.940900
+     | 0.847035    0.127464
+     | 0.054348    0.296494
+     | 0.838545    0.247025
+     | 0.838697    0.436220
+     | 0.309496    0.833591
 
     >> Import["ExampleData/ExampleData.txt", {"ExampleFormat1", "Elements"}]
      = {Data, Header}
diff --git a/mathics/doc/doc_entries.py b/mathics/doc/doc_entries.py
@@ -93,7 +93,7 @@
     r"""(?mx)^  # re.MULTILINE (multi-line match)
                 # and re.VERBOSE (readable regular expressions
         ((?:.|\n)*?)
-        ^\s+([>#SX])>[ ](.*)  # test-code indicator
+        ^\s*([>#SX])>[ ](.*)  # test-code indicator
         ((?:\n\s*(?:[:|=.][ ]|\.).*)*)  # test-code results"""
 )
 TESTCASE_OUT_RE = re.compile(r"^\s*([:|=])(.*)$")
@@ -216,10 +216,7 @@ def parse_docstring_to_DocumentationEntry_items(
         logging.warning("``key_part`` is deprecated. Its value is discarded.")
 
     # Remove commented lines.
-    doc = filter_comments(doc).strip(r"\s")
-
-    # Remove leading <dl>...</dl>
-    # doc = DL_RE.sub("", doc)
+    doc = filter_comments(doc)
 
     # pre-substitute Python code because it might contain tests
     doc, post_substitutions = pre_sub(
@@ -394,11 +391,16 @@ def compare_out(self, outs: tuple = tuple()) -> bool:
             # Mismatched number of output lines, and we don't have "..."
             return False
 
+        # Python 3.13 replaces tabs by a single space in docstrings.
+        # In doctests we replace tabs by sequences of four spaces.
+        def tabs_to_spaces(val):
+            return val.text.replace("\t", 4 * " ")
+
         # Need to check all output line by line
         for got, wanted in zip(outs, wanted_outs):
             if wanted.text == "...":
                 return True
-            if not got == wanted:
+            if not tabs_to_spaces(got) == tabs_to_spaces(wanted):
                 return False
 
         return True