✨ NEW: Add plugin & tests to render subscripts (#122)

miteshashar · web-flow · commit 91264a63db28 · 2025-08-11T09:14:38.000+02:00
diff --git a/docs/index.md b/docs/index.md
@@ -113,6 +113,12 @@ html_string = md.render("some *Markdown*")
 .. autofunction:: mdit_py_plugins.amsmath.amsmath_plugin
 ```
 
+## Subscripts
+
+```{eval-rst}
+.. autofunction:: mdit_py_plugins.subscript.sub_plugin
+```
+
 ## MyST plugins
 
 `myst_blocks` and `myst_role` plugins are also available, for utilisation by the [MyST renderer](https://myst-parser.readthedocs.io/en/latest/using/syntax.html)
diff --git a/mdit_py_plugins/subscript/__init__.py b/mdit_py_plugins/subscript/__init__.py
@@ -0,0 +1,117 @@
+"""
+Markdown-it-py plugin to introduce <sub> markup using ~subscript~.
+
+Ported from
+https://github.com/markdown-it/markdown-it-sub/blob/master/index.mjs
+
+Originally ported during implementation of https://github.com/hasgeek/funnel/blob/main/funnel/utils/markdown/mdit_plugins/sub_tag.py
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+import re
+
+from markdown_it import MarkdownIt
+from markdown_it.renderer import RendererHTML
+from markdown_it.rules_inline import StateInline
+from markdown_it.token import Token
+from markdown_it.utils import EnvType, OptionsDict
+
+__all__ = ["sub_plugin"]
+
+TILDE_CHAR = "~"
+
+WHITESPACE_RE = re.compile(r"(^|[^\\])(\\\\)*\s")
+UNESCAPE_RE = re.compile(r'\\([ \\!"#$%&\'()*+,.\/:;<=>?@[\]^_`{|}~-])')
+
+
+def tokenize(state: StateInline, silent: bool) -> bool:
+    """Parse a ~subscript~ token."""
+    start = state.pos
+    ch = state.src[start]
+    maximum = state.posMax
+    found = False
+
+    # Don't run any pairs in validation mode
+    if silent:
+        return False
+
+    if ch != TILDE_CHAR:
+        return False
+
+    if start + 2 >= maximum:
+        return False
+
+    state.pos = start + 1
+
+    while state.pos < maximum:
+        if state.src[state.pos] == TILDE_CHAR:
+            found = True
+            break
+        state.md.inline.skipToken(state)
+
+    if not found or start + 1 == state.pos:
+        state.pos = start
+        return False
+
+    content = state.src[start + 1 : state.pos]
+
+    # Don't allow unescaped spaces/newlines inside
+    if WHITESPACE_RE.search(content) is not None:
+        state.pos = start
+        return False
+
+    # Found a valid pair, so update posMax and pos
+    state.posMax = state.pos
+    state.pos = start + 1
+
+    # Earlier we checked "not silent", but this implementation does not need it
+    token = state.push("sub_open", "sub", 1)
+    token.markup = TILDE_CHAR
+
+    token = state.push("text", "", 0)
+    token.content = UNESCAPE_RE.sub(r"\1", content)
+
+    token = state.push("sub_close", "sub", -1)
+    token.markup = TILDE_CHAR
+
+    state.pos = state.posMax + 1
+    state.posMax = maximum
+    return True
+
+
+def sub_open(
+    renderer: RendererHTML,
+    tokens: Sequence[Token],
+    idx: int,
+    options: OptionsDict,
+    env: EnvType,
+) -> str:
+    """Render the opening tag for a ~subscript~ token."""
+    return "<sub>"
+
+
+def sub_close(
+    renderer: RendererHTML,
+    tokens: Sequence[Token],
+    idx: int,
+    options: OptionsDict,
+    env: EnvType,
+) -> str:
+    """Render the closing tag for a ~subscript~ token."""
+    return "</sub>"
+
+
+def sub_plugin(md: MarkdownIt) -> None:
+    """
+    Markdown-it-py plugin to introduce <sub> markup using ~subscript~.
+
+    Ported from
+    https://github.com/markdown-it/markdown-it-sub/blob/master/index.mjs
+
+    Originally ported during implementation of https://github.com/hasgeek/funnel/blob/main/funnel/utils/markdown/mdit_plugins/sub_tag.py
+    """
+    md.inline.ruler.after("emphasis", "sub", tokenize)
+    md.add_render_rule("sub_open", sub_open)
+    md.add_render_rule("sub_close", sub_close)
diff --git a/mdit_py_plugins/subscript/port.yaml b/mdit_py_plugins/subscript/port.yaml
@@ -0,0 +1,9 @@
+- package: markdown-it-sub
+  commit: 422e93885b3c611234d602aa795f3d75a62cc93e
+  date: 5 Dec 2023
+  version: 3.0.0
+  changes:
+    - TODO - Some strikethrough and subscript combinations are not rendered
+      correctly in markdown-it either, but that can be fixed at a later stage,
+      perhaps in both markdown-it and markdown-it-py.
+      See `tests/fixtures/subscript_strikethrough.md` for examples.
diff --git a/tests/fixtures/subscript.md b/tests/fixtures/subscript.md
@@ -0,0 +1,77 @@
+.
+~foo\~
+.
+<p>~foo~</p>
+.
+
+.
+~foo bar~
+.
+<p>~foo bar~</p>
+.
+
+.
+~foo\ bar\ baz~
+.
+<p><sub>foo bar baz</sub></p>
+.
+
+.
+~\ foo\ ~
+.
+<p><sub> foo </sub></p>
+.
+
+.
+~foo\\\\\\\ bar~
+.
+<p><sub>foo\\\ bar</sub></p>
+.
+
+.
+~foo\\\\\\ bar~
+.
+<p>~foo\\\ bar~</p>
+.
+
+.
+**~foo~ bar**
+.
+<p><strong><sub>foo</sub> bar</strong></p>
+.
+
+
+coverage
+.
+*~f
+.
+<p>*~f</p>
+.
+
+Basic:
+.
+H~2~O
+.
+<p>H<sub>2</sub>O</p>
+.
+
+Spaces:
+.
+H~2 O~2
+.
+<p>H~2 O~2</p>
+.
+
+Escaped:
+.
+H\~2\~O
+.
+<p>H~2~O</p>
+.
+
+Nested:
+.
+a~b~c~d~e
+.
+<p>a<sub>b</sub>c<sub>d</sub>e</p>
+.
diff --git a/tests/fixtures/subscript_strikethrough.md b/tests/fixtures/subscript_strikethrough.md
@@ -0,0 +1,68 @@
+Strikethrough versus subscript:
+.
+~~strikethrough~~versus~subscript~
+.
+<p><s>strikethrough</s>versus<sub>subscript</sub></p>
+.
+
+Subscript in strikethrough (beginning):
+.
+~~~subscript~strikethrough~~
+This ends up being rendered as a code block, but that's expected.
+Hence, it has to be closed with `~~~`
+~~~
+Only then will the following text be rendered as it is intended.
+We cannot use `~~~subscript~strikethrough~~` at the beginning of a line.
+.
+<pre><code class="language-subscript~strikethrough~~">This ends up being rendered as a code block, but that's expected.
+Hence, it has to be closed with `~~~`
+</code></pre>
+<p>Only then will the following text be rendered as it is intended.
+We cannot use <code>~~~subscript~strikethrough~~</code> at the beginning of a line.</p>
+.
+
+Strikethrough in subscript (beginning):
+.
+~~~strikethrough~~subscript~
+This ends up being rendered as a code block, but that's expected.
+Hence, it has to be closed with `~~~`
+~~~
+Only then will the following text be rendered as it is intended.
+We cannot use `~~~strikethrough~~subscript~` at the beginning of a line.
+.
+<pre><code class="language-strikethrough~~subscript~">This ends up being rendered as a code block, but that's expected.
+Hence, it has to be closed with `~~~`
+</code></pre>
+<p>Only then will the following text be rendered as it is intended.
+We cannot use <code>~~~strikethrough~~subscript~</code> at the beginning of a line.</p>
+.
+
+Subscript in strikethrough (end):
+.
+~~strikethrough~subscript~~~
+.
+<p><s>strikethrough<sub>subscript</sub></s></p>
+.
+
+Strikethrough in subscript (end):
+.
+TODO: ~subscript~~strikethrough~~~
+.
+<p>TODO: <sub>subscript</sub><sub>strikethrough</sub>~~</p>
+.
+
+Subscript in strikethrough:
+.
+~~strikethrough~subscript~strikethrough~~
+.
+<p><s>strikethrough<sub>subscript</sub>strikethrough</s></p>
+.
+
+Strikethrough in subscript:
+.
+TODO: ~subscript~~strikethrough~~subscript~
+This should have beeen similar to *emphasised**strong**emphasised*.
+.
+<p>TODO: <sub>subscript</sub><sub>strikethrough</sub><sub>subscript</sub>
+This should have beeen similar to <em>emphasised<strong>strong</strong>emphasised</em>.</p>
+.
diff --git a/tests/test_subscript.py b/tests/test_subscript.py
@@ -0,0 +1,40 @@
+"""Tests for subscript plugin."""
+
+from pathlib import Path
+
+from markdown_it import MarkdownIt
+from markdown_it.utils import read_fixture_file
+import pytest
+
+from mdit_py_plugins.subscript import sub_plugin
+
+FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "subscript.md")
+STRIKETHROUGH_FIXTURE_PATH = Path(__file__).parent.joinpath(
+    "fixtures", "subscript_strikethrough.md"
+)
+
+
+@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH))
+def test_all(line, title, input, expected):
+    """Tests for subscript plugin."""
+    md = MarkdownIt("commonmark").use(sub_plugin)
+    text = md.render(input)
+    try:
+        assert text.rstrip() == expected.rstrip()
+    except AssertionError:
+        print(text)
+        raise
+
+
+@pytest.mark.parametrize(
+    "line,title,input,expected", read_fixture_file(STRIKETHROUGH_FIXTURE_PATH)
+)
+def test_all_strikethrough(line, title, input, expected):
+    """Tests for subscript plugin with strikethrough enabled."""
+    md = MarkdownIt("commonmark").enable("strikethrough").use(sub_plugin)
+    text = md.render(input)
+    try:
+        assert text.rstrip() == expected.rstrip()
+    except AssertionError:
+        print(text)
+        raise