From 6dc3dfadbbfd0a8a7fde63b80905e7a97c118183 Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 12:19:12 +0800
Subject: [PATCH 1/8] ruff format

---
 markdownify/__init__.py  | 567 +++++++++++++++++++++------------------
 markdownify/__init__.pyi |  17 +-
 markdownify/main.py      | 197 +++++++++-----
 3 files changed, 445 insertions(+), 336 deletions(-)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 148d340..7df448b 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -5,77 +5,77 @@
 
 
 # General-purpose regex patterns
-re_convert_heading = re.compile(r'convert_h(\d+)')
-re_line_with_content = re.compile(r'^(.*)', flags=re.MULTILINE)
-re_whitespace = re.compile(r'[\t ]+')
-re_all_whitespace = re.compile(r'[\t \r\n]+')
-re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
-re_html_heading = re.compile(r'h(\d+)')
-re_pre_lstrip1 = re.compile(r'^ *\n')
-re_pre_rstrip1 = re.compile(r'\n *$')
-re_pre_lstrip = re.compile(r'^[ \n]*\n')
-re_pre_rstrip = re.compile(r'[ \n]*$')
+re_convert_heading = re.compile(r"convert_h(\d+)")
+re_line_with_content = re.compile(r"^(.*)", flags=re.MULTILINE)
+re_whitespace = re.compile(r"[\t ]+")
+re_all_whitespace = re.compile(r"[\t \r\n]+")
+re_newline_whitespace = re.compile(r"[\t \r\n]*[\r\n][\t \r\n]*")
+re_html_heading = re.compile(r"h(\d+)")
+re_pre_lstrip1 = re.compile(r"^ *\n")
+re_pre_rstrip1 = re.compile(r"\n *$")
+re_pre_lstrip = re.compile(r"^[ \n]*\n")
+re_pre_rstrip = re.compile(r"[ \n]*$")
 
 # Pattern for creating convert_<tag> function names from tag names
-re_make_convert_fn_name = re.compile(r'[\[\]:-]')
+re_make_convert_fn_name = re.compile(r"[\[\]:-]")
 
 # Extract (leading_nl, content, trailing_nl) from a string
 # (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
-re_extract_newlines = re.compile(r'^(\n*)((?:.*[^\n])?)(\n*)$', flags=re.DOTALL)
+re_extract_newlines = re.compile(r"^(\n*)((?:.*[^\n])?)(\n*)$", flags=re.DOTALL)
 
 # Escape miscellaneous special Markdown characters
-re_escape_misc_chars = re.compile(r'([]\\&<`[>~=+|])')
+re_escape_misc_chars = re.compile(r"([]\\&<`[>~=+|])")
 
 # Escape sequence of one or more consecutive '-', preceded
 # and followed by whitespace or start/end of fragment, as it
 # might be confused with an underline of a header, or with a
 # list marker
-re_escape_misc_dash_sequences = re.compile(r'(\s|^)(-+(?:\s|$))')
+re_escape_misc_dash_sequences = re.compile(r"(\s|^)(-+(?:\s|$))")
 
 # Escape sequence of up to six consecutive '#', preceded
 # and followed by whitespace or start/end of fragment, as
 # it might be confused with an ATX heading
-re_escape_misc_hashes = re.compile(r'(\s|^)(#{1,6}(?:\s|$))')
+re_escape_misc_hashes = re.compile(r"(\s|^)(#{1,6}(?:\s|$))")
 
 # Escape '.' or ')' preceded by up to nine digits, as it might be
 # confused with a list item
-re_escape_misc_list_items = re.compile(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))')
+re_escape_misc_list_items = re.compile(r"((?:\s|^)[0-9]{1,9})([.)](?:\s|$))")
 
 # Find consecutive backtick sequences in a string
-re_backtick_runs = re.compile(r'`+')
+re_backtick_runs = re.compile(r"`+")
 
 # Heading styles
-ATX = 'atx'
-ATX_CLOSED = 'atx_closed'
-UNDERLINED = 'underlined'
+ATX = "atx"
+ATX_CLOSED = "atx_closed"
+UNDERLINED = "underlined"
 SETEXT = UNDERLINED
 
 # Newline style
-SPACES = 'spaces'
-BACKSLASH = 'backslash'
+SPACES = "spaces"
+BACKSLASH = "backslash"
 
 # Strong and emphasis style
-ASTERISK = '*'
-UNDERSCORE = '_'
+ASTERISK = "*"
+UNDERSCORE = "_"
 
 # Document/pre strip styles
-LSTRIP = 'lstrip'
-RSTRIP = 'rstrip'
-STRIP = 'strip'
-STRIP_ONE = 'strip_one'
+LSTRIP = "lstrip"
+RSTRIP = "rstrip"
+STRIP = "strip"
+STRIP_ONE = "strip_one"
 
 
 def strip1_pre(text):
     """Strip one leading and trailing newline from a <pre> string."""
-    text = re_pre_lstrip1.sub('', text)
-    text = re_pre_rstrip1.sub('', text)
+    text = re_pre_lstrip1.sub("", text)
+    text = re_pre_rstrip1.sub("", text)
     return text
 
 
 def strip_pre(text):
     """Strip all leading and trailing newlines from a <pre> string."""
-    text = re_pre_lstrip.sub('', text)
-    text = re_pre_rstrip.sub('', text)
+    text = re_pre_lstrip.sub("", text)
+    text = re_pre_rstrip.sub("", text)
     return text
 
 
@@ -86,8 +86,8 @@ def chomp(text):
     This function is used to prevent conversions like
         <b> foo</b> => ** foo**
     """
-    prefix = ' ' if text and text[0] == ' ' else ''
-    suffix = ' ' if text and text[-1] == ' ' else ''
+    prefix = " " if text and text[0] == " " else ""
+    suffix = " " if text and text[-1] == " " else ""
     text = text.strip()
     return (prefix, suffix, text)
 
@@ -100,23 +100,25 @@ def abstract_inline_conversion(markup_fn):
     the text if it looks like an HTML tag. markup_fn is necessary to allow for
     references to self.strong_em_symbol etc.
     """
+
     def implementation(self, el, text, parent_tags):
         markup_prefix = markup_fn(self)
-        if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
-            markup_suffix = '</' + markup_prefix[1:]
+        if markup_prefix.startswith("<") and markup_prefix.endswith(">"):
+            markup_suffix = "</" + markup_prefix[1:]
         else:
             markup_suffix = markup_prefix
-        if '_noformat' in parent_tags:
+        if "_noformat" in parent_tags:
             return text
         prefix, suffix, text = chomp(text)
         if not text:
-            return ''
-        return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
+            return ""
+        return "%s%s%s%s%s" % (prefix, markup_prefix, text, markup_suffix, suffix)
+
     return implementation
 
 
 def _todict(obj):
-    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
+    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith("_"))
 
 
 def should_remove_whitespace_inside(el):
@@ -125,17 +127,31 @@ def should_remove_whitespace_inside(el):
         return False
     if re_html_heading.match(el.name) is not None:
         return True
-    return el.name in ('p', 'blockquote',
-                       'article', 'div', 'section',
-                       'ol', 'ul', 'li',
-                       'dl', 'dt', 'dd',
-                       'table', 'thead', 'tbody', 'tfoot',
-                       'tr', 'td', 'th')
+    return el.name in (
+        "p",
+        "blockquote",
+        "article",
+        "div",
+        "section",
+        "ol",
+        "ul",
+        "li",
+        "dl",
+        "dt",
+        "dd",
+        "table",
+        "thead",
+        "tbody",
+        "tfoot",
+        "tr",
+        "td",
+        "th",
+    )
 
 
 def should_remove_whitespace_outside(el):
     """Return to remove whitespace immediately outside a block-level element."""
-    return should_remove_whitespace_inside(el) or (el and el.name == 'pre')
+    return should_remove_whitespace_inside(el) or (el and el.name == "pre")
 
 
 def _is_block_content_element(el):
@@ -150,7 +166,7 @@ def _is_block_content_element(el):
     elif isinstance(el, (Comment, Doctype)):
         return False  # (subclasses of NavigableString, must test first)
     elif isinstance(el, NavigableString):
-        return el.strip() != ''
+        return el.strip() != ""
     else:
         return False
 
@@ -176,9 +192,9 @@ def _next_block_content_sibling(el):
 class MarkdownConverter(object):
     class DefaultOptions:
         autolinks = True
-        bs4_options = 'html.parser'
-        bullets = '*+-'  # An iterable of bullet types.
-        code_language = ''
+        bs4_options = "html.parser"
+        bullets = "*+-"  # An iterable of bullet types.
+        code_language = ""
         code_language_callback = None
         convert = None
         default_title = False
@@ -192,8 +208,8 @@ class DefaultOptions:
         strip_document = STRIP
         strip_pre = STRIP
         strong_em_symbol = ASTERISK
-        sub_symbol = ''
-        sup_symbol = ''
+        sub_symbol = ""
+        sup_symbol = ""
         table_infer_header = False
         wrap = False
         wrap_width = 80
@@ -207,19 +223,20 @@ def __init__(self, **options):
         self.options = _todict(self.DefaultOptions)
         self.options.update(_todict(self.Options))
         self.options.update(options)
-        if self.options['strip'] is not None and self.options['convert'] is not None:
-            raise ValueError('You may specify either tags to strip or tags to'
-                             ' convert, but not both.')
+        if self.options["strip"] is not None and self.options["convert"] is not None:
+            raise ValueError(
+                "You may specify either tags to strip or tags to convert, but not both."
+            )
 
         # If a string or list is passed to bs4_options, assume it is a 'features' specification
-        if not isinstance(self.options['bs4_options'], dict):
-            self.options['bs4_options'] = {'features': self.options['bs4_options']}
+        if not isinstance(self.options["bs4_options"], dict):
+            self.options["bs4_options"] = {"features": self.options["bs4_options"]}
 
         # Initialize the conversion function cache
         self.convert_fn_cache = {}
 
     def convert(self, html):
-        soup = BeautifulSoup(html, **self.options['bs4_options'])
+        soup = BeautifulSoup(html, **self.options["bs4_options"])
         return self.convert_soup(soup)
 
     def convert_soup(self, soup):
@@ -249,13 +266,17 @@ def _can_ignore(el):
                 # (subclasses of NavigableString, must test first)
                 return True
             elif isinstance(el, NavigableString):
-                if six.text_type(el).strip() != '':
+                if six.text_type(el).strip() != "":
                     # Non-whitespace text nodes are always processed.
                     return False
-                elif should_remove_inside and (not el.previous_sibling or not el.next_sibling):
+                elif should_remove_inside and (
+                    not el.previous_sibling or not el.next_sibling
+                ):
                     # Inside block elements (excluding <pre>), ignore adjacent whitespace elements.
                     return True
-                elif should_remove_whitespace_outside(el.previous_sibling) or should_remove_whitespace_outside(el.next_sibling):
+                elif should_remove_whitespace_outside(
+                    el.previous_sibling
+                ) or should_remove_whitespace_outside(el.next_sibling):
                     # Outside block elements (including <pre>), ignore adjacent whitespace elements.
                     return True
                 else:
@@ -263,7 +284,7 @@ def _can_ignore(el):
             elif el is None:
                 return True
             else:
-                raise ValueError('Unexpected element type: %s' % type(el))
+                raise ValueError("Unexpected element type: %s" % type(el))
 
         children_to_convert = [el for el in node.children if not _can_ignore(el)]
 
@@ -275,13 +296,13 @@ def _can_ignore(el):
         # if this tag is a heading or table cell, add an '_inline' parent pseudo-tag
         if (
             re_html_heading.match(node.name) is not None  # headings
-            or node.name in {'td', 'th'}  # table cells
+            or node.name in {"td", "th"}  # table cells
         ):
-            parent_tags_for_children.add('_inline')
+            parent_tags_for_children.add("_inline")
 
         # if this tag is a preformatted element, add a '_noformat' parent pseudo-tag
-        if node.name in {'pre', 'code', 'kbd', 'samp'}:
-            parent_tags_for_children.add('_noformat')
+        if node.name in {"pre", "code", "kbd", "samp"}:
+            parent_tags_for_children.add("_noformat")
 
         # Convert the children elements into a list of result strings.
         child_strings = [
@@ -293,22 +314,26 @@ def _can_ignore(el):
         child_strings = [s for s in child_strings if s]
 
         # Collapse newlines at child element boundaries, if needed.
-        if node.name == 'pre' or node.find_parent('pre'):
+        if node.name == "pre" or node.find_parent("pre"):
             # Inside <pre> blocks, do not collapse newlines.
             pass
         else:
             # Collapse newlines at child element boundaries.
-            updated_child_strings = ['']  # so the first lookback works
+            updated_child_strings = [""]  # so the first lookback works
             for child_string in child_strings:
                 # Separate the leading/trailing newlines from the content.
-                leading_nl, content, trailing_nl = re_extract_newlines.match(child_string).groups()
+                leading_nl, content, trailing_nl = re_extract_newlines.match(
+                    child_string
+                ).groups()
 
                 # If the last child had trailing newlines and this child has leading newlines,
                 # use the larger newline count, limited to 2.
                 if updated_child_strings[-1] and leading_nl:
-                    prev_trailing_nl = updated_child_strings.pop()  # will be replaced by the collapsed value
+                    prev_trailing_nl = (
+                        updated_child_strings.pop()
+                    )  # will be replaced by the collapsed value
                     num_newlines = min(2, max(len(prev_trailing_nl), len(leading_nl)))
-                    leading_nl = '\n' * num_newlines
+                    leading_nl = "\n" * num_newlines
 
                 # Add the results to the updated child string list.
                 updated_child_strings.extend([leading_nl, content, trailing_nl])
@@ -316,7 +341,7 @@ def _can_ignore(el):
             child_strings = updated_child_strings
 
         # Join all child text strings into a single string.
-        text = ''.join(child_strings)
+        text = "".join(child_strings)
 
         # apply this tag's final conversion function
         convert_fn = self.get_conv_fn_cached(node.name)
@@ -327,16 +352,18 @@ def _can_ignore(el):
 
     def convert__document_(self, el, text, parent_tags):
         """Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
-        if self.options['strip_document'] == LSTRIP:
-            text = text.lstrip('\n')  # remove leading separation newlines
-        elif self.options['strip_document'] == RSTRIP:
-            text = text.rstrip('\n')  # remove trailing separation newlines
-        elif self.options['strip_document'] == STRIP:
-            text = text.strip('\n')  # remove leading and trailing separation newlines
-        elif self.options['strip_document'] is None:
+        if self.options["strip_document"] == LSTRIP:
+            text = text.lstrip("\n")  # remove leading separation newlines
+        elif self.options["strip_document"] == RSTRIP:
+            text = text.rstrip("\n")  # remove trailing separation newlines
+        elif self.options["strip_document"] == STRIP:
+            text = text.strip("\n")  # remove leading and trailing separation newlines
+        elif self.options["strip_document"] is None:
             pass  # leave leading and trailing separation newlines as-is
         else:
-            raise ValueError('Invalid value for strip_document: %s' % self.options['strip_document'])
+            raise ValueError(
+                "Invalid value for strip_document: %s" % self.options["strip_document"]
+            )
 
         return text
 
@@ -345,30 +372,30 @@ def process_text(self, el, parent_tags=None):
         if parent_tags is None:
             parent_tags = set()
 
-        text = six.text_type(el) or ''
+        text = six.text_type(el) or ""
 
         # normalize whitespace if we're not inside a preformatted element
-        if 'pre' not in parent_tags:
-            if self.options['wrap']:
-                text = re_all_whitespace.sub(' ', text)
+        if "pre" not in parent_tags:
+            if self.options["wrap"]:
+                text = re_all_whitespace.sub(" ", text)
             else:
-                text = re_newline_whitespace.sub('\n', text)
-                text = re_whitespace.sub(' ', text)
+                text = re_newline_whitespace.sub("\n", text)
+                text = re_whitespace.sub(" ", text)
 
         # escape special characters if we're not inside a preformatted or code element
-        if '_noformat' not in parent_tags:
+        if "_noformat" not in parent_tags:
             text = self.escape(text, parent_tags)
 
         # remove leading whitespace at the start or just after a
         # block-level element; remove traliing whitespace at the end
         # or just before a block-level element.
-        if (should_remove_whitespace_outside(el.previous_sibling)
-                or (should_remove_whitespace_inside(el.parent)
-                    and not el.previous_sibling)):
-            text = text.lstrip(' \t\r\n')
-        if (should_remove_whitespace_outside(el.next_sibling)
-                or (should_remove_whitespace_inside(el.parent)
-                    and not el.next_sibling)):
+        if should_remove_whitespace_outside(el.previous_sibling) or (
+            should_remove_whitespace_inside(el.parent) and not el.previous_sibling
+        ):
+            text = text.lstrip(" \t\r\n")
+        if should_remove_whitespace_outside(el.next_sibling) or (
+            should_remove_whitespace_inside(el.parent) and not el.next_sibling
+        ):
             text = text.rstrip()
 
         return text
@@ -400,15 +427,17 @@ def get_conv_fn(self, tag_name):
         match = re_html_heading.match(tag_name)
         if match:
             n = int(match.group(1))  # get value of N from <hN>
-            return lambda el, text, parent_tags: self.convert_hN(n, el, text, parent_tags)
+            return lambda el, text, parent_tags: self.convert_hN(
+                n, el, text, parent_tags
+            )
 
         # No conversion function was found
         return None
 
     def should_convert_tag(self, tag):
         """Given a tag name, return whether to convert based on strip/convert options."""
-        strip = self.options['strip']
-        convert = self.options['convert']
+        strip = self.options["strip"]
+        convert = self.options["convert"]
         if strip is not None:
             return tag not in strip
         elif convert is not None:
@@ -418,123 +447,137 @@ def should_convert_tag(self, tag):
 
     def escape(self, text, parent_tags):
         if not text:
-            return ''
-        if self.options['escape_misc']:
-            text = re_escape_misc_chars.sub(r'\\\1', text)
-            text = re_escape_misc_dash_sequences.sub(r'\1\\\2', text)
-            text = re_escape_misc_hashes.sub(r'\1\\\2', text)
-            text = re_escape_misc_list_items.sub(r'\1\\\2', text)
-
-        if self.options['escape_asterisks']:
-            text = text.replace('*', r'\*')
-        if self.options['escape_underscores']:
-            text = text.replace('_', r'\_')
+            return ""
+        if self.options["escape_misc"]:
+            text = re_escape_misc_chars.sub(r"\\\1", text)
+            text = re_escape_misc_dash_sequences.sub(r"\1\\\2", text)
+            text = re_escape_misc_hashes.sub(r"\1\\\2", text)
+            text = re_escape_misc_list_items.sub(r"\1\\\2", text)
+
+        if self.options["escape_asterisks"]:
+            text = text.replace("*", r"\*")
+        if self.options["escape_underscores"]:
+            text = text.replace("_", r"\_")
         return text
 
     def underline(self, text, pad_char):
-        text = (text or '').rstrip()
-        return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
+        text = (text or "").rstrip()
+        return "\n\n%s\n%s\n\n" % (text, pad_char * len(text)) if text else ""
 
     def convert_a(self, el, text, parent_tags):
-        if '_noformat' in parent_tags:
+        if "_noformat" in parent_tags:
             return text
         prefix, suffix, text = chomp(text)
         if not text:
-            return ''
-        href = el.get('href')
-        title = el.get('title')
+            return ""
+        href = el.get("href")
+        title = el.get("title")
         # For the replacement see #29: text nodes underscores are escaped
-        if (self.options['autolinks']
-                and text.replace(r'\_', '_') == href
-                and not title
-                and not self.options['default_title']):
+        if (
+            self.options["autolinks"]
+            and text.replace(r"\_", "_") == href
+            and not title
+            and not self.options["default_title"]
+        ):
             # Shortcut syntax
-            return '<%s>' % href
-        if self.options['default_title'] and not title:
+            return "<%s>" % href
+        if self.options["default_title"] and not title:
             title = href
-        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
+        title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
+        return (
+            "%s[%s](%s%s)%s" % (prefix, text, href, title_part, suffix)
+            if href
+            else text
+        )
 
-    convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])
+    convert_b = abstract_inline_conversion(
+        lambda self: 2 * self.options["strong_em_symbol"]
+    )
 
     def convert_blockquote(self, el, text, parent_tags):
         # handle some early-exit scenarios
-        text = (text or '').strip(' \t\r\n')
-        if '_inline' in parent_tags:
-            return ' ' + text + ' '
+        text = (text or "").strip(" \t\r\n")
+        if "_inline" in parent_tags:
+            return " " + text + " "
         if not text:
             return "\n"
 
         # indent lines with blockquote marker
         def _indent_for_blockquote(match):
             line_content = match.group(1)
-            return '> ' + line_content if line_content else '>'
+            return "> " + line_content if line_content else ">"
+
         text = re_line_with_content.sub(_indent_for_blockquote, text)
 
-        return '\n' + text + '\n\n'
+        return "\n" + text + "\n\n"
 
     def convert_br(self, el, text, parent_tags):
-        if '_inline' in parent_tags:
-            return ' '
+        if "_inline" in parent_tags:
+            return " "
 
-        if self.options['newline_style'].lower() == BACKSLASH:
-            return '\\\n'
+        if self.options["newline_style"].lower() == BACKSLASH:
+            return "\\\n"
         else:
-            return '  \n'
+            return "  \n"
 
     def convert_code(self, el, text, parent_tags):
-        if '_noformat' in parent_tags:
+        if "_noformat" in parent_tags:
             return text
 
         prefix, suffix, text = chomp(text)
         if not text:
-            return ''
+            return ""
 
         # Find the maximum number of consecutive backticks in the text, then
         # delimit the code span with one more backtick than that
-        max_backticks = max((len(match) for match in re.findall(re_backtick_runs, text)), default=0)
-        markup_delimiter = '`' * (max_backticks + 1)
+        max_backticks = max(
+            (len(match) for match in re.findall(re_backtick_runs, text)), default=0
+        )
+        markup_delimiter = "`" * (max_backticks + 1)
 
         # If the maximum number of backticks is greater than zero, add a space
         # to avoid interpretation of inside backticks as literals
         if max_backticks > 0:
             text = " " + text + " "
 
-        return '%s%s%s%s%s' % (prefix, markup_delimiter, text, markup_delimiter, suffix)
+        return "%s%s%s%s%s" % (prefix, markup_delimiter, text, markup_delimiter, suffix)
 
-    convert_del = abstract_inline_conversion(lambda self: '~~')
+    convert_del = abstract_inline_conversion(lambda self: "~~")
 
     def convert_div(self, el, text, parent_tags):
-        if '_inline' in parent_tags:
-            return ' ' + text.strip() + ' '
+        if "_inline" in parent_tags:
+            return " " + text.strip() + " "
         text = text.strip()
-        return '\n\n%s\n\n' % text if text else ''
+        return "\n\n%s\n\n" % text if text else ""
 
     convert_article = convert_div
 
     convert_section = convert_div
 
-    convert_em = abstract_inline_conversion(lambda self: self.options['strong_em_symbol'])
+    convert_em = abstract_inline_conversion(
+        lambda self: self.options["strong_em_symbol"]
+    )
 
     convert_kbd = convert_code
 
     def convert_dd(self, el, text, parent_tags):
-        text = (text or '').strip()
-        if '_inline' in parent_tags:
-            return ' ' + text + ' '
+        text = (text or "").strip()
+        if "_inline" in parent_tags:
+            return " " + text + " "
         if not text:
-            return '\n'
+            return "\n"
 
         # indent definition content lines by four spaces
         def _indent_for_dd(match):
             line_content = match.group(1)
-            return '    ' + line_content if line_content else ''
+            return "    " + line_content if line_content else ""
+
         text = re_line_with_content.sub(_indent_for_dd, text)
 
         # insert definition marker into first-line indent whitespace
-        text = ':' + text[1:]
+        text = ":" + text[1:]
 
-        return '%s\n' % text
+        return "%s\n" % text
 
     # definition lists are formatted as follows:
     #   https://pandoc.org/MANUAL.html#definition-lists
@@ -543,175 +586,183 @@ def _indent_for_dd(match):
 
     def convert_dt(self, el, text, parent_tags):
         # remove newlines from term text
-        text = (text or '').strip()
-        text = re_all_whitespace.sub(' ', text)
-        if '_inline' in parent_tags:
-            return ' ' + text + ' '
+        text = (text or "").strip()
+        text = re_all_whitespace.sub(" ", text)
+        if "_inline" in parent_tags:
+            return " " + text + " "
         if not text:
-            return '\n'
+            return "\n"
 
         # TODO - format consecutive <dt> elements as directly adjacent lines):
         #   https://michelf.ca/projects/php-markdown/extra/#def-list
 
-        return '\n\n%s\n' % text
+        return "\n\n%s\n" % text
 
     def convert_hN(self, n, el, text, parent_tags):
         # convert_hN() converts <hN> tags, where N is any integer
-        if '_inline' in parent_tags:
+        if "_inline" in parent_tags:
             return text
 
         # Markdown does not support heading depths of n > 6
         n = max(1, min(6, n))
 
-        style = self.options['heading_style'].lower()
+        style = self.options["heading_style"].lower()
         text = text.strip()
         if style == UNDERLINED and n <= 2:
-            line = '=' if n == 1 else '-'
+            line = "=" if n == 1 else "-"
             return self.underline(text, line)
-        text = re_all_whitespace.sub(' ', text)
-        hashes = '#' * n
+        text = re_all_whitespace.sub(" ", text)
+        hashes = "#" * n
         if style == ATX_CLOSED:
-            return '\n\n%s %s %s\n\n' % (hashes, text, hashes)
-        return '\n\n%s %s\n\n' % (hashes, text)
+            return "\n\n%s %s %s\n\n" % (hashes, text, hashes)
+        return "\n\n%s %s\n\n" % (hashes, text)
 
     def convert_hr(self, el, text, parent_tags):
-        return '\n\n---\n\n'
+        return "\n\n---\n\n"
 
     convert_i = convert_em
 
     def convert_img(self, el, text, parent_tags):
-        alt = el.attrs.get('alt', None) or ''
-        src = el.attrs.get('src', None) or ''
-        title = el.attrs.get('title', None) or ''
-        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        if ('_inline' in parent_tags
-                and el.parent.name not in self.options['keep_inline_images_in']):
+        alt = el.attrs.get("alt", None) or ""
+        src = el.attrs.get("src", None) or ""
+        title = el.attrs.get("title", None) or ""
+        title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
+        if (
+            "_inline" in parent_tags
+            and el.parent.name not in self.options["keep_inline_images_in"]
+        ):
             return alt
 
-        return '![%s](%s%s)' % (alt, src, title_part)
+        return "![%s](%s%s)" % (alt, src, title_part)
 
     def convert_video(self, el, text, parent_tags):
-        if ('_inline' in parent_tags
-                and el.parent.name not in self.options['keep_inline_images_in']):
+        if (
+            "_inline" in parent_tags
+            and el.parent.name not in self.options["keep_inline_images_in"]
+        ):
             return text
-        src = el.attrs.get('src', None) or ''
+        src = el.attrs.get("src", None) or ""
         if not src:
-            sources = el.find_all('source', attrs={'src': True})
+            sources = el.find_all("source", attrs={"src": True})
             if sources:
-                src = sources[0].attrs.get('src', None) or ''
-        poster = el.attrs.get('poster', None) or ''
+                src = sources[0].attrs.get("src", None) or ""
+        poster = el.attrs.get("poster", None) or ""
         if src and poster:
-            return '[![%s](%s)](%s)' % (text, poster, src)
+            return "[![%s](%s)](%s)" % (text, poster, src)
         if src:
-            return '[%s](%s)' % (text, src)
+            return "[%s](%s)" % (text, src)
         if poster:
-            return '![%s](%s)' % (text, poster)
+            return "![%s](%s)" % (text, poster)
         return text
 
     def convert_list(self, el, text, parent_tags):
-
         # Converting a list to inline is undefined.
         # Ignoring inline conversion parents for list.
 
         before_paragraph = False
         next_sibling = _next_block_content_sibling(el)
-        if next_sibling and next_sibling.name not in ['ul', 'ol']:
+        if next_sibling and next_sibling.name not in ["ul", "ol"]:
             before_paragraph = True
-        if 'li' in parent_tags:
+        if "li" in parent_tags:
             # remove trailing newline if we're in a nested list
-            return '\n' + text.rstrip()
-        return '\n\n' + text + ('\n' if before_paragraph else '')
+            return "\n" + text.rstrip()
+        return "\n\n" + text + ("\n" if before_paragraph else "")
 
     convert_ul = convert_list
     convert_ol = convert_list
 
     def convert_li(self, el, text, parent_tags):
         # handle some early-exit scenarios
-        text = (text or '').strip()
+        text = (text or "").strip()
         if not text:
             return "\n"
 
         # determine list item bullet character to use
         parent = el.parent
-        if parent is not None and parent.name == 'ol':
+        if parent is not None and parent.name == "ol":
             if parent.get("start") and str(parent.get("start")).isnumeric():
                 start = int(parent.get("start"))
             else:
                 start = 1
-            bullet = '%s.' % (start + len(el.find_previous_siblings('li')))
+            bullet = "%s." % (start + len(el.find_previous_siblings("li")))
         else:
             depth = -1
             while el:
-                if el.name == 'ul':
+                if el.name == "ul":
                     depth += 1
                 el = el.parent
-            bullets = self.options['bullets']
+            bullets = self.options["bullets"]
             bullet = bullets[depth % len(bullets)]
-        bullet = bullet + ' '
+        bullet = bullet + " "
         bullet_width = len(bullet)
-        bullet_indent = ' ' * bullet_width
+        bullet_indent = " " * bullet_width
 
         # indent content lines by bullet width
         def _indent_for_li(match):
             line_content = match.group(1)
-            return bullet_indent + line_content if line_content else ''
+            return bullet_indent + line_content if line_content else ""
+
         text = re_line_with_content.sub(_indent_for_li, text)
 
         # insert bullet into first-line indent whitespace
         text = bullet + text[bullet_width:]
 
-        return '%s\n' % text
+        return "%s\n" % text
 
     def convert_p(self, el, text, parent_tags):
-        if '_inline' in parent_tags:
-            return ' ' + text.strip(' \t\r\n') + ' '
-        text = text.strip(' \t\r\n')
-        if self.options['wrap']:
+        if "_inline" in parent_tags:
+            return " " + text.strip(" \t\r\n") + " "
+        text = text.strip(" \t\r\n")
+        if self.options["wrap"]:
             # Preserve newlines (and preceding whitespace) resulting
             # from <br> tags.  Newlines in the input have already been
             # replaced by spaces.
-            if self.options['wrap_width'] is not None:
-                lines = text.split('\n')
+            if self.options["wrap_width"] is not None:
+                lines = text.split("\n")
                 new_lines = []
                 for line in lines:
-                    line = line.lstrip(' \t\r\n')
+                    line = line.lstrip(" \t\r\n")
                     line_no_trailing = line.rstrip()
-                    trailing = line[len(line_no_trailing):]
-                    line = fill(line,
-                                width=self.options['wrap_width'],
-                                break_long_words=False,
-                                break_on_hyphens=False)
+                    trailing = line[len(line_no_trailing) :]
+                    line = fill(
+                        line,
+                        width=self.options["wrap_width"],
+                        break_long_words=False,
+                        break_on_hyphens=False,
+                    )
                     new_lines.append(line + trailing)
-                text = '\n'.join(new_lines)
-        return '\n\n%s\n\n' % text if text else ''
+                text = "\n".join(new_lines)
+        return "\n\n%s\n\n" % text if text else ""
 
     def convert_pre(self, el, text, parent_tags):
         if not text:
-            return ''
-        code_language = self.options['code_language']
+            return ""
+        code_language = self.options["code_language"]
 
-        if self.options['code_language_callback']:
-            code_language = self.options['code_language_callback'](el) or code_language
+        if self.options["code_language_callback"]:
+            code_language = self.options["code_language_callback"](el) or code_language
 
-        if self.options['strip_pre'] == STRIP:
+        if self.options["strip_pre"] == STRIP:
             text = strip_pre(text)  # remove all leading/trailing newlines
-        elif self.options['strip_pre'] == STRIP_ONE:
+        elif self.options["strip_pre"] == STRIP_ONE:
             text = strip1_pre(text)  # remove one leading/trailing newline
-        elif self.options['strip_pre'] is None:
+        elif self.options["strip_pre"] is None:
             pass  # leave leading and trailing newlines as-is
         else:
-            raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
+            raise ValueError(
+                "Invalid value for strip_pre: %s" % self.options["strip_pre"]
+            )
 
-        return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
+        return "\n\n```%s\n%s\n```\n\n" % (code_language, text)
 
     def convert_q(self, el, text, parent_tags):
         return '"' + text + '"'
 
     def convert_script(self, el, text, parent_tags):
-        return ''
+        return ""
 
     def convert_style(self, el, text, parent_tags):
-        return ''
+        return ""
 
     convert_s = convert_del
 
@@ -719,75 +770,75 @@ def convert_style(self, el, text, parent_tags):
 
     convert_samp = convert_code
 
-    convert_sub = abstract_inline_conversion(lambda self: self.options['sub_symbol'])
+    convert_sub = abstract_inline_conversion(lambda self: self.options["sub_symbol"])
 
-    convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol'])
+    convert_sup = abstract_inline_conversion(lambda self: self.options["sup_symbol"])
 
     def convert_table(self, el, text, parent_tags):
-        return '\n\n' + text.strip() + '\n\n'
+        return "\n\n" + text.strip() + "\n\n"
 
     def convert_caption(self, el, text, parent_tags):
-        return text.strip() + '\n\n'
+        return text.strip() + "\n\n"
 
     def convert_figcaption(self, el, text, parent_tags):
-        return '\n\n' + text.strip() + '\n\n'
+        return "\n\n" + text.strip() + "\n\n"
 
     def convert_td(self, el, text, parent_tags):
         colspan = 1
-        if 'colspan' in el.attrs and el['colspan'].isdigit():
-            colspan = max(1, min(1000, int(el['colspan'])))
-        return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
+        if "colspan" in el.attrs and el["colspan"].isdigit():
+            colspan = max(1, min(1000, int(el["colspan"])))
+        return " " + text.strip().replace("\n", " ") + " |" * colspan
 
     def convert_th(self, el, text, parent_tags):
         colspan = 1
-        if 'colspan' in el.attrs and el['colspan'].isdigit():
-            colspan = max(1, min(1000, int(el['colspan'])))
-        return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
+        if "colspan" in el.attrs and el["colspan"].isdigit():
+            colspan = max(1, min(1000, int(el["colspan"])))
+        return " " + text.strip().replace("\n", " ") + " |" * colspan
 
     def convert_tr(self, el, text, parent_tags):
-        cells = el.find_all(['td', 'th'])
+        cells = el.find_all(["td", "th"])
         is_first_row = el.find_previous_sibling() is None
-        is_headrow = (
-            all([cell.name == 'th' for cell in cells])
-            or (el.parent.name == 'thead'
-                # avoid multiple tr in thead
-                and len(el.parent.find_all('tr')) == 1)
+        is_headrow = all([cell.name == "th" for cell in cells]) or (
+            el.parent.name == "thead"
+            # avoid multiple tr in thead
+            and len(el.parent.find_all("tr")) == 1
         )
-        is_head_row_missing = (
-            (is_first_row and not el.parent.name == 'tbody')
-            or (is_first_row and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
+        is_head_row_missing = (is_first_row and not el.parent.name == "tbody") or (
+            is_first_row
+            and el.parent.name == "tbody"
+            and len(el.parent.parent.find_all(["thead"])) < 1
         )
-        overline = ''
-        underline = ''
+        overline = ""
+        underline = ""
         full_colspan = 0
         for cell in cells:
-            if 'colspan' in cell.attrs and cell['colspan'].isdigit():
-                full_colspan += max(1, min(1000, int(cell['colspan'])))
+            if "colspan" in cell.attrs and cell["colspan"].isdigit():
+                full_colspan += max(1, min(1000, int(cell["colspan"])))
             else:
                 full_colspan += 1
-        if ((is_headrow
-             or (is_head_row_missing
-                 and self.options['table_infer_header']))
-                and is_first_row):
+        if (
+            is_headrow or (is_head_row_missing and self.options["table_infer_header"])
+        ) and is_first_row:
             # first row and:
             # - is headline or
             # - headline is missing and header inference is enabled
             # print headline underline
-            underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
-        elif ((is_head_row_missing
-               and not self.options['table_infer_header'])
-              or (is_first_row
-                  and (el.parent.name == 'table'
-                       or (el.parent.name == 'tbody'
-                           and not el.parent.find_previous_sibling())))):
+            underline += "| " + " | ".join(["---"] * full_colspan) + " |" + "\n"
+        elif (is_head_row_missing and not self.options["table_infer_header"]) or (
+            is_first_row
+            and (
+                el.parent.name == "table"
+                or (el.parent.name == "tbody" and not el.parent.find_previous_sibling())
+            )
+        ):
             # headline is missing and header inference is disabled or:
             # first row, not headline, and:
             #  - the parent is table or
             #  - the parent is tbody at the beginning of a table.
             # print empty headline above this row
-            overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
-            overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
-        return overline + '|' + text + '\n' + underline
+            overline += "| " + " | ".join([""] * full_colspan) + " |" + "\n"
+            overline += "| " + " | ".join(["---"] * full_colspan) + " |" + "\n"
+        return overline + "|" + text + "\n" + underline
 
 
 def markdownify(html, **options):
diff --git a/markdownify/__init__.pyi b/markdownify/__init__.pyi
index 5f9b852..ccb587f 100644
--- a/markdownify/__init__.pyi
+++ b/markdownify/__init__.pyi
@@ -14,7 +14,6 @@ RSTRIP: str
 STRIP: str
 STRIP_ONE: str
 
-
 def markdownify(
     html: str,
     autolinks: bool = ...,
@@ -41,7 +40,6 @@ def markdownify(
     wrap_width: int = ...,
 ) -> str: ...
 
-
 class MarkdownConverter:
     def __init__(
         self,
@@ -49,7 +47,9 @@ class MarkdownConverter:
         bs4_options: str = ...,
         bullets: str = ...,
         code_language: str = ...,
-        code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
+        code_language_callback: Union[
+            Callable[[Incomplete], Union[str, None]], None
+        ] = ...,
         convert: Union[list[str], None] = ...,
         default_title: bool = ...,
         escape_asterisks: bool = ...,
@@ -67,11 +67,6 @@ class MarkdownConverter:
         table_infer_header: bool = ...,
         wrap: bool = ...,
         wrap_width: int = ...,
-    ) -> None:
-        ...
-  
-    def convert(self, html: str) -> str:
-        ...
-
-    def convert_soup(self, soup: Incomplete) -> str:
-        ...
+    ) -> None: ...
+    def convert(self, html: str) -> str: ...
+    def convert_soup(self, soup: Incomplete) -> str: ...
diff --git a/markdownify/main.py b/markdownify/main.py
index ba70671..6c32f28 100755
--- a/markdownify/main.py
+++ b/markdownify/main.py
@@ -3,82 +3,145 @@
 import argparse
 import sys
 
-from markdownify import markdownify, ATX, ATX_CLOSED, UNDERLINED, \
-    SPACES, BACKSLASH, ASTERISK, UNDERSCORE
+from markdownify import (
+    markdownify,
+    ATX,
+    ATX_CLOSED,
+    UNDERLINED,
+    SPACES,
+    BACKSLASH,
+    ASTERISK,
+    UNDERSCORE,
+)
 
 
 def main(argv=sys.argv[1:]):
     parser = argparse.ArgumentParser(
-        prog='markdownify',
-        description='Converts html to markdown.',
+        prog="markdownify",
+        description="Converts html to markdown.",
     )
 
-    parser.add_argument('html', nargs='?', type=argparse.FileType('r'),
-                        default=sys.stdin,
-                        help="The html file to convert. Defaults to STDIN if not "
-                        "provided.")
-    parser.add_argument('-s', '--strip', nargs='*',
-                        help="A list of tags to strip. This option can't be used with "
-                        "the --convert option.")
-    parser.add_argument('-c', '--convert', nargs='*',
-                        help="A list of tags to convert. This option can't be used with "
-                        "the --strip option.")
-    parser.add_argument('-a', '--autolinks', action='store_true',
-                        help="A boolean indicating whether the 'automatic link' style "
-                        "should be used when a 'a' tag's contents match its href.")
-    parser.add_argument('--default-title', action='store_false',
-                        help="A boolean to enable setting the title of a link to its "
-                        "href, if no title is given.")
-    parser.add_argument('--heading-style', default=UNDERLINED,
-                        choices=(ATX, ATX_CLOSED, UNDERLINED),
-                        help="Defines how headings should be converted.")
-    parser.add_argument('-b', '--bullets', default='*+-',
-                        help="A string of bullet styles to use; the bullet will "
-                        "alternate based on nesting level.")
-    parser.add_argument('--strong-em-symbol', default=ASTERISK,
-                        choices=(ASTERISK, UNDERSCORE),
-                        help="Use * or _ to convert strong and italics text"),
-    parser.add_argument('--sub-symbol', default='',
-                        help="Define the chars that surround '<sub>'.")
-    parser.add_argument('--sup-symbol', default='',
-                        help="Define the chars that surround '<sup>'.")
-    parser.add_argument('--newline-style', default=SPACES,
-                        choices=(SPACES, BACKSLASH),
-                        help="Defines the style of <br> conversions: two spaces "
-                        "or backslash at the and of the line thet should break.")
-    parser.add_argument('--code-language', default='',
-                        help="Defines the language that should be assumed for all "
-                        "'<pre>' sections.")
-    parser.add_argument('--no-escape-asterisks', dest='escape_asterisks',
-                        action='store_false',
-                        help="Do not escape '*' to '\\*' in text.")
-    parser.add_argument('--no-escape-underscores', dest='escape_underscores',
-                        action='store_false',
-                        help="Do not escape '_' to '\\_' in text.")
-    parser.add_argument('-i', '--keep-inline-images-in',
-                        default=[],
-                        nargs='*',
-                        help="Images are converted to their alt-text when the images are "
-                        "located inside headlines or table cells. If some inline images "
-                        "should be converted to markdown images instead, this option can "
-                        "be set to a list of parent tags that should be allowed to "
-                        "contain inline images.")
-    parser.add_argument('--table-infer-header', dest='table_infer_header',
-                        action='store_true',
-                        help="When a table has no header row (as indicated by '<thead>' "
-                        "or '<th>'), use the first body row as the header row.")
-    parser.add_argument('-w', '--wrap', action='store_true',
-                        help="Wrap all text paragraphs at --wrap-width characters.")
-    parser.add_argument('--wrap-width', type=int, default=80)
-    parser.add_argument('--bs4-options',
-                        default='html.parser',
-                        help="Specifies the parser that BeautifulSoup should use to parse "
-                             "the HTML markup. Examples include 'html5.parser', 'lxml', and "
-                             "'html5lib'.")
+    parser.add_argument(
+        "html",
+        nargs="?",
+        type=argparse.FileType("r"),
+        default=sys.stdin,
+        help="The html file to convert. Defaults to STDIN if not provided.",
+    )
+    parser.add_argument(
+        "-s",
+        "--strip",
+        nargs="*",
+        help="A list of tags to strip. This option can't be used with "
+        "the --convert option.",
+    )
+    parser.add_argument(
+        "-c",
+        "--convert",
+        nargs="*",
+        help="A list of tags to convert. This option can't be used with "
+        "the --strip option.",
+    )
+    parser.add_argument(
+        "-a",
+        "--autolinks",
+        action="store_true",
+        help="A boolean indicating whether the 'automatic link' style "
+        "should be used when a 'a' tag's contents match its href.",
+    )
+    parser.add_argument(
+        "--default-title",
+        action="store_false",
+        help="A boolean to enable setting the title of a link to its "
+        "href, if no title is given.",
+    )
+    parser.add_argument(
+        "--heading-style",
+        default=UNDERLINED,
+        choices=(ATX, ATX_CLOSED, UNDERLINED),
+        help="Defines how headings should be converted.",
+    )
+    parser.add_argument(
+        "-b",
+        "--bullets",
+        default="*+-",
+        help="A string of bullet styles to use; the bullet will "
+        "alternate based on nesting level.",
+    )
+    (
+        parser.add_argument(
+            "--strong-em-symbol",
+            default=ASTERISK,
+            choices=(ASTERISK, UNDERSCORE),
+            help="Use * or _ to convert strong and italics text",
+        ),
+    )
+    parser.add_argument(
+        "--sub-symbol", default="", help="Define the chars that surround '<sub>'."
+    )
+    parser.add_argument(
+        "--sup-symbol", default="", help="Define the chars that surround '<sup>'."
+    )
+    parser.add_argument(
+        "--newline-style",
+        default=SPACES,
+        choices=(SPACES, BACKSLASH),
+        help="Defines the style of <br> conversions: two spaces "
+        "or backslash at the and of the line thet should break.",
+    )
+    parser.add_argument(
+        "--code-language",
+        default="",
+        help="Defines the language that should be assumed for all '<pre>' sections.",
+    )
+    parser.add_argument(
+        "--no-escape-asterisks",
+        dest="escape_asterisks",
+        action="store_false",
+        help="Do not escape '*' to '\\*' in text.",
+    )
+    parser.add_argument(
+        "--no-escape-underscores",
+        dest="escape_underscores",
+        action="store_false",
+        help="Do not escape '_' to '\\_' in text.",
+    )
+    parser.add_argument(
+        "-i",
+        "--keep-inline-images-in",
+        default=[],
+        nargs="*",
+        help="Images are converted to their alt-text when the images are "
+        "located inside headlines or table cells. If some inline images "
+        "should be converted to markdown images instead, this option can "
+        "be set to a list of parent tags that should be allowed to "
+        "contain inline images.",
+    )
+    parser.add_argument(
+        "--table-infer-header",
+        dest="table_infer_header",
+        action="store_true",
+        help="When a table has no header row (as indicated by '<thead>' "
+        "or '<th>'), use the first body row as the header row.",
+    )
+    parser.add_argument(
+        "-w",
+        "--wrap",
+        action="store_true",
+        help="Wrap all text paragraphs at --wrap-width characters.",
+    )
+    parser.add_argument("--wrap-width", type=int, default=80)
+    parser.add_argument(
+        "--bs4-options",
+        default="html.parser",
+        help="Specifies the parser that BeautifulSoup should use to parse "
+        "the HTML markup. Examples include 'html5.parser', 'lxml', and "
+        "'html5lib'.",
+    )
 
     args = parser.parse_args(argv)
     print(markdownify(**vars(args)))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From ad71318fe43905529de59bb4d2347e36552ce4ae Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 12:28:36 +0800
Subject: [PATCH 2/8] Inital conversion work to selectolax

---
 markdownify/__init__.py | 127 +++++++++++++++++++++++++---------------
 1 file changed, 81 insertions(+), 46 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 7df448b..76b048b 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -1,8 +1,8 @@
-from bs4 import BeautifulSoup, Comment, Doctype, NavigableString, Tag
-from textwrap import fill
 import re
-import six
+from textwrap import fill
+from typing import Any, Callable
 
+from selectolax.lexbor import LexborHTMLParser, LexborNode
 
 # General-purpose regex patterns
 re_convert_heading = re.compile(r"convert_h(\d+)")
@@ -79,7 +79,18 @@ def strip_pre(text):
     return text
 
 
-def chomp(text):
+def find_parent(node: LexborNode | None, node_tag: str):
+    """Finds a parent with the specified tag"""
+    while node:
+        node = node.parent
+        if node is None:
+            break
+        if node.tag == node_tag:
+            return node
+    return node
+
+
+def chomp(text: str):
     """
     If the text in an inline tag like b, a, or em contains a leading or trailing
     space, strip the string and return a space as suffix of prefix, if needed.
@@ -92,7 +103,7 @@ def chomp(text):
     return (prefix, suffix, text)
 
 
-def abstract_inline_conversion(markup_fn):
+def abstract_inline_conversion(markup_fn: Callable):
     """
     This abstracts all simple inline tags like b, em, del, ...
     Returns a function that wraps the chomped text in a pair of the string
@@ -117,17 +128,12 @@ def implementation(self, el, text, parent_tags):
     return implementation
 
 
-def _todict(obj):
+def _todict(obj:Any):
     return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith("_"))
 
 
-def should_remove_whitespace_inside(el):
-    """Return to remove whitespace immediately inside a block-level element."""
-    if not el or not el.name:
-        return False
-    if re_html_heading.match(el.name) is not None:
-        return True
-    return el.name in (
+WHITESPACE_ABLE = set(
+    [
         "p",
         "blockquote",
         "article",
@@ -146,27 +152,49 @@ def should_remove_whitespace_inside(el):
         "tr",
         "td",
         "th",
-    )
+    ]
+)
 
 
-def should_remove_whitespace_outside(el):
+def should_remove_whitespace_inside(el: LexborNode | None):
+    """Return to remove whitespace immediately inside a block-level element."""
+    if not el or not el.tag:
+        return False
+    if re_html_heading.match(el.tag) is not None:
+        return True
+    return el.tag in WHITESPACE_ABLE
+
+
+def should_remove_whitespace_outside(el: LexborNode | None):
     """Return to remove whitespace immediately outside a block-level element."""
-    return should_remove_whitespace_inside(el) or (el and el.name == "pre")
+    return should_remove_whitespace_inside(el) or (el and el.tag == "pre")
 
 
-def _is_block_content_element(el):
+def is_tag(el: LexborNode):
+    """Returns True if the lexbor node is a tag"""
+    return (
+        True
+        if el.tag_id not in [None, "-text", "-document", "-comment", "-doctype"]
+        else False
+    )
+
+
+def _is_block_content_element(el: LexborNode | None):
     """
     In a block context, returns:
 
     - True for content elements (tags and non-whitespace text)
     - False for non-content elements (whitespace text, comments, doctypes)
     """
-    if isinstance(el, Tag):
+    if not el:
+        return False
+    if is_tag(el):
         return True
-    elif isinstance(el, (Comment, Doctype)):
+    elif el.tag == "-comment":
         return False  # (subclasses of NavigableString, must test first)
-    elif isinstance(el, NavigableString):
-        return el.strip() != ""
+    elif el.tag == "-text":
+        text = el.text_content if el.text_content else ""
+        return text.strip() != ""
     else:
         return False
 
@@ -189,10 +217,9 @@ def _next_block_content_sibling(el):
     return None
 
 
-class MarkdownConverter(object):
+class MarkdownConverter:
     class DefaultOptions:
         autolinks = True
-        bs4_options = "html.parser"
         bullets = "*+-"  # An iterable of bullet types.
         code_language = ""
         code_language_callback = None
@@ -235,20 +262,26 @@ def __init__(self, **options):
         # Initialize the conversion function cache
         self.convert_fn_cache = {}
 
-    def convert(self, html):
-        soup = BeautifulSoup(html, **self.options["bs4_options"])
+    def convert(self, html: str | bytes) -> str | None:
+        soup = LexborHTMLParser(html)
         return self.convert_soup(soup)
 
-    def convert_soup(self, soup):
-        return self.process_tag(soup, parent_tags=set())
+    def convert_soup(self, soup: LexborHTMLParser | LexborNode) -> str | None:
+        if isinstance(soup, LexborHTMLParser) and soup.root:
+            return self.process_tag(soup.root, parent_tags=set())
+        elif isinstance(soup, LexborNode):
+            return self.process_tag(soup, parent_tags=set())
+        raise NotImplementedError(
+            f"Unexpected type: {type(soup)} passed to convert_soup()."
+        )
 
-    def process_element(self, node, parent_tags=None):
-        if isinstance(node, NavigableString):
+    def process_element(self, node: LexborNode, parent_tags=None):
+        if node.tag and node.tag == "-text":
             return self.process_text(node, parent_tags=parent_tags)
         else:
             return self.process_tag(node, parent_tags=parent_tags)
 
-    def process_tag(self, node, parent_tags=None):
+    def process_tag(self, node: LexborNode, parent_tags=None):
         # For the top-level element, initialize the parent context with an empty set.
         if parent_tags is None:
             parent_tags = set()
@@ -257,26 +290,24 @@ def process_tag(self, node, parent_tags=None):
         # adjacent to the inner/outer boundaries of block elements.
         should_remove_inside = should_remove_whitespace_inside(node)
 
-        def _can_ignore(el):
-            if isinstance(el, Tag):
+        def _can_ignore(el: LexborNode):
+            if is_tag(el):
                 # Tags are always processed.
                 return False
-            elif isinstance(el, (Comment, Doctype)):
+            elif el.tag in ["-comment", "-doctype"]:
                 # Comment and Doctype elements are always ignored.
                 # (subclasses of NavigableString, must test first)
                 return True
-            elif isinstance(el, NavigableString):
-                if six.text_type(el).strip() != "":
+            elif el.tag == "-text":
+                if el.text_content and el.text_content.strip():
                     # Non-whitespace text nodes are always processed.
                     return False
-                elif should_remove_inside and (
-                    not el.previous_sibling or not el.next_sibling
-                ):
+                elif should_remove_inside and (not el.prev or not el.next):
                     # Inside block elements (excluding <pre>), ignore adjacent whitespace elements.
                     return True
                 elif should_remove_whitespace_outside(
-                    el.previous_sibling
-                ) or should_remove_whitespace_outside(el.next_sibling):
+                    el.prev
+                ) or should_remove_whitespace_outside(el.next):
                     # Outside block elements (including <pre>), ignore adjacent whitespace elements.
                     return True
                 else:
@@ -286,22 +317,26 @@ def _can_ignore(el):
             else:
                 raise ValueError("Unexpected element type: %s" % type(el))
 
-        children_to_convert = [el for el in node.children if not _can_ignore(el)]
+        children_to_convert = [
+            el
+            for el in node.iter(include_text=True)
+            if not _can_ignore(el) and el != node
+        ]
 
         # Create a copy of this tag's parent context, then update it to include this tag
         # to propagate down into the children.
         parent_tags_for_children = set(parent_tags)
-        parent_tags_for_children.add(node.name)
+        parent_tags_for_children.add(node.tag)
 
         # if this tag is a heading or table cell, add an '_inline' parent pseudo-tag
         if (
-            re_html_heading.match(node.name) is not None  # headings
-            or node.name in {"td", "th"}  # table cells
+            (node.tag and re_html_heading.match(node.tag) is not None)  # headings
+            or node.tag in {"td", "th"}  # table cells
         ):
             parent_tags_for_children.add("_inline")
 
         # if this tag is a preformatted element, add a '_noformat' parent pseudo-tag
-        if node.name in {"pre", "code", "kbd", "samp"}:
+        if node.tag in {"pre", "code", "kbd", "samp"}:
             parent_tags_for_children.add("_noformat")
 
         # Convert the children elements into a list of result strings.
@@ -314,7 +349,7 @@ def _can_ignore(el):
         child_strings = [s for s in child_strings if s]
 
         # Collapse newlines at child element boundaries, if needed.
-        if node.name == "pre" or node.find_parent("pre"):
+        if node.tag == "pre" or find_parent(node, "pre"):
             # Inside <pre> blocks, do not collapse newlines.
             pass
         else:

From 5adc61d87b4a5c7f0026d4382ab7d6250a0103fa Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 12:45:39 +0800
Subject: [PATCH 3/8] Finish conversion to selectolax & remove bs4 options

---
 markdownify/__init__.py  | 216 +++++++++++++++++++++++----------------
 markdownify/__init__.pyi |   2 -
 2 files changed, 129 insertions(+), 89 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 76b048b..bfbdbad 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -10,7 +10,6 @@
 re_whitespace = re.compile(r"[\t ]+")
 re_all_whitespace = re.compile(r"[\t \r\n]+")
 re_newline_whitespace = re.compile(r"[\t \r\n]*[\r\n][\t \r\n]*")
-re_html_heading = re.compile(r"h(\d+)")
 re_pre_lstrip1 = re.compile(r"^ *\n")
 re_pre_rstrip1 = re.compile(r"\n *$")
 re_pre_lstrip = re.compile(r"^[ \n]*\n")
@@ -65,6 +64,21 @@
 STRIP_ONE = "strip_one"
 
 
+def is_header_tag(tag_name: str):
+    """Returns True if the tag is a header (h1, h2, h3 ...)"""
+    tag_name = tag_name.lower()
+    # XXX: isdigit() is the fastest, but can be inaccurate
+    return tag_name[0] == "h" and tag_name[1:].isdigit()
+
+
+def find_previous_siblings(el: LexborNode | None, tag: str):
+    """Finds a previous element with specified tag"""
+    while el:
+        el = el.prev
+        if el and el.tag == tag:
+            yield el
+
+
 def strip1_pre(text):
     """Strip one leading and trailing newline from a <pre> string."""
     text = re_pre_lstrip1.sub("", text)
@@ -79,15 +93,15 @@ def strip_pre(text):
     return text
 
 
-def find_parent(node: LexborNode | None, node_tag: str):
+def find_parent(el: LexborNode | None, node_tag: str):
     """Finds a parent with the specified tag"""
-    while node:
-        node = node.parent
-        if node is None:
+    while el:
+        el = el.parent
+        if el is None:
             break
-        if node.tag == node_tag:
-            return node
-    return node
+        if el.tag == node_tag:
+            return el
+    return el
 
 
 def chomp(text: str):
@@ -128,7 +142,7 @@ def implementation(self, el, text, parent_tags):
     return implementation
 
 
-def _todict(obj:Any):
+def _todict(obj: Any):
     return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith("_"))
 
 
@@ -160,7 +174,7 @@ def should_remove_whitespace_inside(el: LexborNode | None):
     """Return to remove whitespace immediately inside a block-level element."""
     if not el or not el.tag:
         return False
-    if re_html_heading.match(el.tag) is not None:
+    if is_header_tag(el.tag) is not None:
         return True
     return el.tag in WHITESPACE_ABLE
 
@@ -255,9 +269,6 @@ def __init__(self, **options):
                 "You may specify either tags to strip or tags to convert, but not both."
             )
 
-        # If a string or list is passed to bs4_options, assume it is a 'features' specification
-        if not isinstance(self.options["bs4_options"], dict):
-            self.options["bs4_options"] = {"features": self.options["bs4_options"]}
 
         # Initialize the conversion function cache
         self.convert_fn_cache = {}
@@ -275,20 +286,22 @@ def convert_soup(self, soup: LexborHTMLParser | LexborNode) -> str | None:
             f"Unexpected type: {type(soup)} passed to convert_soup()."
         )
 
-    def process_element(self, node: LexborNode, parent_tags=None):
-        if node.tag and node.tag == "-text":
-            return self.process_text(node, parent_tags=parent_tags)
+    def process_element(self, el: LexborNode, parent_tags=None):
+        if el.tag and el.tag == "-text":
+            return self.process_text(el, parent_tags=parent_tags)
         else:
-            return self.process_tag(node, parent_tags=parent_tags)
+            return self.process_tag(el, parent_tags=parent_tags)
 
-    def process_tag(self, node: LexborNode, parent_tags=None):
+    def process_tag(self, el: LexborNode, parent_tags=None):
         # For the top-level element, initialize the parent context with an empty set.
         if parent_tags is None:
             parent_tags = set()
 
+        node_tag = el.tag
+
         # Collect child elements to process, ignoring whitespace-only text elements
         # adjacent to the inner/outer boundaries of block elements.
-        should_remove_inside = should_remove_whitespace_inside(node)
+        should_remove_inside = should_remove_whitespace_inside(el)
 
         def _can_ignore(el: LexborNode):
             if is_tag(el):
@@ -318,25 +331,23 @@ def _can_ignore(el: LexborNode):
                 raise ValueError("Unexpected element type: %s" % type(el))
 
         children_to_convert = [
-            el
-            for el in node.iter(include_text=True)
-            if not _can_ignore(el) and el != node
+            el for el in el.iter(include_text=True) if not _can_ignore(el) and el != el
         ]
 
         # Create a copy of this tag's parent context, then update it to include this tag
         # to propagate down into the children.
         parent_tags_for_children = set(parent_tags)
-        parent_tags_for_children.add(node.tag)
+        parent_tags_for_children.add(el.tag)
 
         # if this tag is a heading or table cell, add an '_inline' parent pseudo-tag
         if (
-            (node.tag and re_html_heading.match(node.tag) is not None)  # headings
-            or node.tag in {"td", "th"}  # table cells
+            (node_tag and is_header_tag(node_tag) is not None)  # headings
+            or node_tag in {"td", "th"}  # table cells
         ):
             parent_tags_for_children.add("_inline")
 
         # if this tag is a preformatted element, add a '_noformat' parent pseudo-tag
-        if node.tag in {"pre", "code", "kbd", "samp"}:
+        if node_tag in {"pre", "code", "kbd", "samp"}:
             parent_tags_for_children.add("_noformat")
 
         # Convert the children elements into a list of result strings.
@@ -349,7 +360,7 @@ def _can_ignore(el: LexborNode):
         child_strings = [s for s in child_strings if s]
 
         # Collapse newlines at child element boundaries, if needed.
-        if node.tag == "pre" or find_parent(node, "pre"):
+        if node_tag == "pre" or find_parent(el, "pre"):
             # Inside <pre> blocks, do not collapse newlines.
             pass
         else:
@@ -378,14 +389,17 @@ def _can_ignore(el: LexborNode):
         # Join all child text strings into a single string.
         text = "".join(child_strings)
 
+        # Ensure node.tag is valid.
+        if el.tag is None:
+            raise NotImplementedError("Expected tag to be valid. Got None.")
         # apply this tag's final conversion function
-        convert_fn = self.get_conv_fn_cached(node.name)
+        convert_fn = self.get_conv_fn_cached(el.tag)
         if convert_fn is not None:
-            text = convert_fn(node, text, parent_tags=parent_tags)
+            text = convert_fn(el, text, parent_tags=parent_tags)
 
         return text
 
-    def convert__document_(self, el, text, parent_tags):
+    def convert__document_(self, el: LexborNode, text, parent_tags):
         """Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
         if self.options["strip_document"] == LSTRIP:
             text = text.lstrip("\n")  # remove leading separation newlines
@@ -402,12 +416,12 @@ def convert__document_(self, el, text, parent_tags):
 
         return text
 
-    def process_text(self, el, parent_tags=None):
+    def process_text(self, el: LexborNode, parent_tags=None):
         # For the top-level element, initialize the parent context with an empty set.
         if parent_tags is None:
             parent_tags = set()
 
-        text = six.text_type(el) or ""
+        text = el.text_content or ""
 
         # normalize whitespace if we're not inside a preformatted element
         if "pre" not in parent_tags:
@@ -424,18 +438,18 @@ def process_text(self, el, parent_tags=None):
         # remove leading whitespace at the start or just after a
         # block-level element; remove traliing whitespace at the end
         # or just before a block-level element.
-        if should_remove_whitespace_outside(el.previous_sibling) or (
-            should_remove_whitespace_inside(el.parent) and not el.previous_sibling
+        if should_remove_whitespace_outside(el.prev) or (
+            should_remove_whitespace_inside(el.parent) and not el.prev
         ):
             text = text.lstrip(" \t\r\n")
-        if should_remove_whitespace_outside(el.next_sibling) or (
-            should_remove_whitespace_inside(el.parent) and not el.next_sibling
+        if should_remove_whitespace_outside(el.next) or (
+            should_remove_whitespace_inside(el.parent) and not el.next
         ):
             text = text.rstrip()
 
         return text
 
-    def get_conv_fn_cached(self, tag_name):
+    def get_conv_fn_cached(self, tag_name: str):
         """Given a tag name, return the conversion function using the cache."""
         # If conversion function is not in cache, add it
         if tag_name not in self.convert_fn_cache:
@@ -444,7 +458,7 @@ def get_conv_fn_cached(self, tag_name):
         # Return the cached entry
         return self.convert_fn_cache[tag_name]
 
-    def get_conv_fn(self, tag_name):
+    def get_conv_fn(self, tag_name: str):
         """Given a tag name, find and return the conversion function."""
         tag_name = tag_name.lower()
 
@@ -459,9 +473,9 @@ def get_conv_fn(self, tag_name):
             return convert_fn
 
         # If tag is any heading, handle with convert_hN() function
-        match = re_html_heading.match(tag_name)
+        match = is_header_tag(tag_name)
         if match:
-            n = int(match.group(1))  # get value of N from <hN>
+            n = int(tag_name[1:])  # get value of N from <hN>
             return lambda el, text, parent_tags: self.convert_hN(
                 n, el, text, parent_tags
             )
@@ -469,7 +483,7 @@ def get_conv_fn(self, tag_name):
         # No conversion function was found
         return None
 
-    def should_convert_tag(self, tag):
+    def should_convert_tag(self, tag: str):
         """Given a tag name, return whether to convert based on strip/convert options."""
         strip = self.options["strip"]
         convert = self.options["convert"]
@@ -499,14 +513,15 @@ def underline(self, text, pad_char):
         text = (text or "").rstrip()
         return "\n\n%s\n%s\n\n" % (text, pad_char * len(text)) if text else ""
 
-    def convert_a(self, el, text, parent_tags):
+    def convert_a(self, el: LexborNode, text, parent_tags):
         if "_noformat" in parent_tags:
             return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ""
-        href = el.get("href")
-        title = el.get("title")
+        attributes = el.attributes
+        href = attributes.get("href")
+        title = attributes.get("title")
         # For the replacement see #29: text nodes underscores are escaped
         if (
             self.options["autolinks"]
@@ -529,7 +544,7 @@ def convert_a(self, el, text, parent_tags):
         lambda self: 2 * self.options["strong_em_symbol"]
     )
 
-    def convert_blockquote(self, el, text, parent_tags):
+    def convert_blockquote(self, el: LexborNode, text, parent_tags):
         # handle some early-exit scenarios
         text = (text or "").strip(" \t\r\n")
         if "_inline" in parent_tags:
@@ -546,7 +561,7 @@ def _indent_for_blockquote(match):
 
         return "\n" + text + "\n\n"
 
-    def convert_br(self, el, text, parent_tags):
+    def convert_br(self, el: LexborNode, text, parent_tags):
         if "_inline" in parent_tags:
             return " "
 
@@ -555,7 +570,7 @@ def convert_br(self, el, text, parent_tags):
         else:
             return "  \n"
 
-    def convert_code(self, el, text, parent_tags):
+    def convert_code(self, el: LexborNode, text, parent_tags):
         if "_noformat" in parent_tags:
             return text
 
@@ -657,31 +672,41 @@ def convert_hr(self, el, text, parent_tags):
 
     convert_i = convert_em
 
-    def convert_img(self, el, text, parent_tags):
-        alt = el.attrs.get("alt", None) or ""
-        src = el.attrs.get("src", None) or ""
-        title = el.attrs.get("title", None) or ""
+    def convert_img(self, el: LexborNode, text, parent_tags):
+        if not el.parent:
+            raise NotImplementedError(
+                "img element does not have a children. Potentially malformed?"
+            )
+        attrs = el.attributes
+        alt = attrs.get("alt", None) or ""
+        src = attrs.get("src", None) or ""
+        title = attrs.get("title", None) or ""
         title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
         if (
             "_inline" in parent_tags
-            and el.parent.name not in self.options["keep_inline_images_in"]
+            and el.parent.tag not in self.options["keep_inline_images_in"]
         ):
             return alt
 
         return "![%s](%s%s)" % (alt, src, title_part)
 
-    def convert_video(self, el, text, parent_tags):
+    def convert_video(self, el: LexborNode, text, parent_tags):
+        if not el.parent:
+            raise NotImplementedError(
+                "video element does not have a children. Potentially malformed?"
+            )
         if (
             "_inline" in parent_tags
-            and el.parent.name not in self.options["keep_inline_images_in"]
+            and el.parent.tag not in self.options["keep_inline_images_in"]
         ):
             return text
-        src = el.attrs.get("src", None) or ""
+        attrs = el.attributes
+        src = attrs.get("src", None) or ""
         if not src:
-            sources = el.find_all("source", attrs={"src": True})
+            sources = el.css("source[src]")
             if sources:
-                src = sources[0].attrs.get("src", None) or ""
-        poster = el.attrs.get("poster", None) or ""
+                src = sources[0].attributes.get("src", None) or ""
+        poster = attrs.get("poster", None) or ""
         if src and poster:
             return "[![%s](%s)](%s)" % (text, poster, src)
         if src:
@@ -690,7 +715,7 @@ def convert_video(self, el, text, parent_tags):
             return "![%s](%s)" % (text, poster)
         return text
 
-    def convert_list(self, el, text, parent_tags):
+    def convert_list(self, el: LexborNode, text, parent_tags):
         # Converting a list to inline is undefined.
         # Ignoring inline conversion parents for list.
 
@@ -706,7 +731,11 @@ def convert_list(self, el, text, parent_tags):
     convert_ul = convert_list
     convert_ol = convert_list
 
-    def convert_li(self, el, text, parent_tags):
+    def convert_li(self, el: LexborNode, text, parent_tags):
+        if not el.parent:
+            raise NotImplementedError(
+                "li element does not have a children. Potentially malformed?"
+            )
         # handle some early-exit scenarios
         text = (text or "").strip()
         if not text:
@@ -714,16 +743,17 @@ def convert_li(self, el, text, parent_tags):
 
         # determine list item bullet character to use
         parent = el.parent
-        if parent is not None and parent.name == "ol":
-            if parent.get("start") and str(parent.get("start")).isnumeric():
-                start = int(parent.get("start"))
+        if parent is not None and parent.tag == "ol":
+            start_attribute = parent.attributes.get("start")
+            if start_attribute and str(start_attribute).isnumeric():
+                start = int(start_attribute)
             else:
                 start = 1
-            bullet = "%s." % (start + len(el.find_previous_siblings("li")))
+            bullet = "%s." % (start + len(list(find_previous_siblings(el, "li"))))
         else:
             depth = -1
             while el:
-                if el.name == "ul":
+                if el.tag == "ul":
                     depth += 1
                 el = el.parent
             bullets = self.options["bullets"]
@@ -809,46 +839,58 @@ def convert_style(self, el, text, parent_tags):
 
     convert_sup = abstract_inline_conversion(lambda self: self.options["sup_symbol"])
 
-    def convert_table(self, el, text, parent_tags):
+    def convert_table(self, el: LexborNode, text, parent_tags):
         return "\n\n" + text.strip() + "\n\n"
 
-    def convert_caption(self, el, text, parent_tags):
+    def convert_caption(self, el: LexborNode, text, parent_tags):
         return text.strip() + "\n\n"
 
-    def convert_figcaption(self, el, text, parent_tags):
+    def convert_figcaption(self, el: LexborNode, text, parent_tags):
         return "\n\n" + text.strip() + "\n\n"
 
-    def convert_td(self, el, text, parent_tags):
+    def convert_td(self, el: LexborNode, text, parent_tags):
         colspan = 1
-        if "colspan" in el.attrs and el["colspan"].isdigit():
-            colspan = max(1, min(1000, int(el["colspan"])))
+        el_colspan = el.attributes.get("colspan")
+        el_colspan = int(el_colspan) if el_colspan and el_colspan.isdigit() else 0
+        if el_colspan:
+            colspan = max(1, min(1000, el_colspan))
         return " " + text.strip().replace("\n", " ") + " |" * colspan
 
-    def convert_th(self, el, text, parent_tags):
+    def convert_th(self, el: LexborNode, text, parent_tags):
         colspan = 1
-        if "colspan" in el.attrs and el["colspan"].isdigit():
-            colspan = max(1, min(1000, int(el["colspan"])))
+        el_colspan = el.attributes.get("colspan")
+        el_colspan = int(el_colspan) if el_colspan and el_colspan.isdigit() else 0
+        if el_colspan:
+            colspan = max(1, min(1000, el_colspan))
         return " " + text.strip().replace("\n", " ") + " |" * colspan
 
-    def convert_tr(self, el, text, parent_tags):
-        cells = el.find_all(["td", "th"])
-        is_first_row = el.find_previous_sibling() is None
-        is_headrow = all([cell.name == "th" for cell in cells]) or (
-            el.parent.name == "thead"
+    def convert_tr(self, el: LexborNode, text, parent_tags):
+        if not el.parent or not el.parent.parent:
+            raise NotImplementedError(
+                "Found table row with no parent or sub-parent. Malformed document?"
+            )
+        cells = el.css("td,th")
+        is_first_row = el.prev is None
+        is_headrow = all([cell.tag == "th" for cell in cells]) or (
+            el.parent.tag == "thead"
             # avoid multiple tr in thead
-            and len(el.parent.find_all("tr")) == 1
+            and len(el.parent.css("tr")) == 1
         )
-        is_head_row_missing = (is_first_row and not el.parent.name == "tbody") or (
+        is_head_row_missing = (is_first_row and not el.parent.tag == "tbody") or (
             is_first_row
-            and el.parent.name == "tbody"
-            and len(el.parent.parent.find_all(["thead"])) < 1
+            and el.parent.tag == "tbody"
+            and len(el.parent.parent.css("thead")) < 1
         )
         overline = ""
         underline = ""
         full_colspan = 0
         for cell in cells:
-            if "colspan" in cell.attrs and cell["colspan"].isdigit():
-                full_colspan += max(1, min(1000, int(cell["colspan"])))
+            cell_colspan = cell.attributes.get("colspan")
+            cell_colspan = (
+                int(cell_colspan) if cell_colspan and cell_colspan.isdigit() else 0
+            )
+            if cell_colspan:
+                full_colspan += max(1, min(1000, cell_colspan))
             else:
                 full_colspan += 1
         if (
@@ -862,8 +904,8 @@ def convert_tr(self, el, text, parent_tags):
         elif (is_head_row_missing and not self.options["table_infer_header"]) or (
             is_first_row
             and (
-                el.parent.name == "table"
-                or (el.parent.name == "tbody" and not el.parent.find_previous_sibling())
+                el.parent.tag == "table"
+                or (el.parent.tag == "tbody" and not el.parent.prev)
             )
         ):
             # headline is missing and header inference is disabled or:
diff --git a/markdownify/__init__.pyi b/markdownify/__init__.pyi
index ccb587f..a9b8674 100644
--- a/markdownify/__init__.pyi
+++ b/markdownify/__init__.pyi
@@ -17,7 +17,6 @@ STRIP_ONE: str
 def markdownify(
     html: str,
     autolinks: bool = ...,
-    bs4_options: str = ...,
     bullets: str = ...,
     code_language: str = ...,
     code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
@@ -44,7 +43,6 @@ class MarkdownConverter:
     def __init__(
         self,
         autolinks: bool = ...,
-        bs4_options: str = ...,
         bullets: str = ...,
         code_language: str = ...,
         code_language_callback: Union[

From d72c80ab70981e347773db9178a62cc15bcf89bd Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 12:46:30 +0800
Subject: [PATCH 4/8] Increment version, fix dependants

---
 pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3df85eb..e8268bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "markdownify"
-version = "1.2.2"
+version = "2.0.0"
 authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
 description = "Convert HTML to markdown."
 readme = "README.rst"
@@ -23,8 +23,7 @@ classifiers = [
     "Topic :: Utilities",
 ]
 dependencies = [
-    "beautifulsoup4>=4.9,<5",
-    "six>=1.15,<2"
+    "selectolax>0.4"
 ]
 
 [project.urls]

From 5fc94a8d8f5098cc9b07d2ba74b4ed46d675e8be Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 13:18:46 +0800
Subject: [PATCH 5/8] Function Typing

---
 markdownify/__init__.py | 85 ++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 35 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index bfbdbad..c2749a3 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -79,14 +79,14 @@ def find_previous_siblings(el: LexborNode | None, tag: str):
             yield el
 
 
-def strip1_pre(text):
+def strip1_pre(text: str):
     """Strip one leading and trailing newline from a <pre> string."""
     text = re_pre_lstrip1.sub("", text)
     text = re_pre_rstrip1.sub("", text)
     return text
 
 
-def strip_pre(text):
+def strip_pre(text: str):
     """Strip all leading and trailing newlines from a <pre> string."""
     text = re_pre_lstrip.sub("", text)
     text = re_pre_rstrip.sub("", text)
@@ -117,7 +117,7 @@ def chomp(text: str):
     return (prefix, suffix, text)
 
 
-def abstract_inline_conversion(markup_fn: Callable):
+def abstract_inline_conversion(markup_fn: Callable[["MarkdownConverter"], str]):
     """
     This abstracts all simple inline tags like b, em, del, ...
     Returns a function that wraps the chomped text in a pair of the string
@@ -258,7 +258,7 @@ class DefaultOptions:
     class Options(DefaultOptions):
         pass
 
-    def __init__(self, **options):
+    def __init__(self, **options: dict[str, Any]):
         # Create an options dictionary. Use DefaultOptions as a base so that
         # it doesn't have to be extended.
         self.options = _todict(self.DefaultOptions)
@@ -269,7 +269,6 @@ def __init__(self, **options):
                 "You may specify either tags to strip or tags to convert, but not both."
             )
 
-
         # Initialize the conversion function cache
         self.convert_fn_cache = {}
 
@@ -286,13 +285,13 @@ def convert_soup(self, soup: LexborHTMLParser | LexborNode) -> str | None:
             f"Unexpected type: {type(soup)} passed to convert_soup()."
         )
 
-    def process_element(self, el: LexborNode, parent_tags=None):
+    def process_element(self, el: LexborNode, parent_tags: set[str] | None = None):
         if el.tag and el.tag == "-text":
             return self.process_text(el, parent_tags=parent_tags)
         else:
             return self.process_tag(el, parent_tags=parent_tags)
 
-    def process_tag(self, el: LexborNode, parent_tags=None):
+    def process_tag(self, el: LexborNode, parent_tags: set[str] | None = None):
         # For the top-level element, initialize the parent context with an empty set.
         if parent_tags is None:
             parent_tags = set()
@@ -399,7 +398,7 @@ def _can_ignore(el: LexborNode):
 
         return text
 
-    def convert__document_(self, el: LexborNode, text, parent_tags):
+    def convert__document_(self, el: LexborNode, text: str, parent_tags: set[str]):
         """Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
         if self.options["strip_document"] == LSTRIP:
             text = text.lstrip("\n")  # remove leading separation newlines
@@ -416,7 +415,7 @@ def convert__document_(self, el: LexborNode, text, parent_tags):
 
         return text
 
-    def process_text(self, el: LexborNode, parent_tags=None):
+    def process_text(self, el: LexborNode, parent_tags: set[str] | None = None):
         # For the top-level element, initialize the parent context with an empty set.
         if parent_tags is None:
             parent_tags = set()
@@ -494,7 +493,7 @@ def should_convert_tag(self, tag: str):
         else:
             return True
 
-    def escape(self, text, parent_tags):
+    def escape(self, text: str, parent_tags: set[str]):
         if not text:
             return ""
         if self.options["escape_misc"]:
@@ -509,11 +508,11 @@ def escape(self, text, parent_tags):
             text = text.replace("_", r"\_")
         return text
 
-    def underline(self, text, pad_char):
+    def underline(self, text: str, pad_char: str):
         text = (text or "").rstrip()
         return "\n\n%s\n%s\n\n" % (text, pad_char * len(text)) if text else ""
 
-    def convert_a(self, el: LexborNode, text, parent_tags):
+    def convert_a(self, el: LexborNode, text: str, parent_tags: set[str]):
         if "_noformat" in parent_tags:
             return text
         prefix, suffix, text = chomp(text)
@@ -544,7 +543,7 @@ def convert_a(self, el: LexborNode, text, parent_tags):
         lambda self: 2 * self.options["strong_em_symbol"]
     )
 
-    def convert_blockquote(self, el: LexborNode, text, parent_tags):
+    def convert_blockquote(self, el: LexborNode, text: str, parent_tags: set[str]):
         # handle some early-exit scenarios
         text = (text or "").strip(" \t\r\n")
         if "_inline" in parent_tags:
@@ -561,7 +560,7 @@ def _indent_for_blockquote(match):
 
         return "\n" + text + "\n\n"
 
-    def convert_br(self, el: LexborNode, text, parent_tags):
+    def convert_br(self, el: LexborNode, text: str, parent_tags: set[str]):
         if "_inline" in parent_tags:
             return " "
 
@@ -570,7 +569,7 @@ def convert_br(self, el: LexborNode, text, parent_tags):
         else:
             return "  \n"
 
-    def convert_code(self, el: LexborNode, text, parent_tags):
+    def convert_code(self, el: LexborNode, text: str, parent_tags: set[str]):
         if "_noformat" in parent_tags:
             return text
 
@@ -594,7 +593,7 @@ def convert_code(self, el: LexborNode, text, parent_tags):
 
     convert_del = abstract_inline_conversion(lambda self: "~~")
 
-    def convert_div(self, el, text, parent_tags):
+    def convert_div(self, el: LexborNode, text: str, parent_tags: set[str]):
         if "_inline" in parent_tags:
             return " " + text.strip() + " "
         text = text.strip()
@@ -610,7 +609,7 @@ def convert_div(self, el, text, parent_tags):
 
     convert_kbd = convert_code
 
-    def convert_dd(self, el, text, parent_tags):
+    def convert_dd(self, el: LexborNode, text: str, parent_tags: set[str]):
         text = (text or "").strip()
         if "_inline" in parent_tags:
             return " " + text + " "
@@ -634,7 +633,7 @@ def _indent_for_dd(match):
     #   https://michelf.ca/projects/php-markdown/extra/#def-list
     convert_dl = convert_div
 
-    def convert_dt(self, el, text, parent_tags):
+    def convert_dt(self, el: LexborNode, text: str, parent_tags: set[str]):
         # remove newlines from term text
         text = (text or "").strip()
         text = re_all_whitespace.sub(" ", text)
@@ -648,7 +647,7 @@ def convert_dt(self, el, text, parent_tags):
 
         return "\n\n%s\n" % text
 
-    def convert_hN(self, n, el, text, parent_tags):
+    def convert_hN(self, n: int, el: LexborNode, text: str, parent_tags: set[str]):
         # convert_hN() converts <hN> tags, where N is any integer
         if "_inline" in parent_tags:
             return text
@@ -667,12 +666,12 @@ def convert_hN(self, n, el, text, parent_tags):
             return "\n\n%s %s %s\n\n" % (hashes, text, hashes)
         return "\n\n%s %s\n\n" % (hashes, text)
 
-    def convert_hr(self, el, text, parent_tags):
+    def convert_hr(self, el: LexborNode, text: str, parent_tags: set[str]):
         return "\n\n---\n\n"
 
     convert_i = convert_em
 
-    def convert_img(self, el: LexborNode, text, parent_tags):
+    def convert_img(self, el: LexborNode, text: str, parent_tags: set[str]):
         if not el.parent:
             raise NotImplementedError(
                 "img element does not have a children. Potentially malformed?"
@@ -690,7 +689,7 @@ def convert_img(self, el: LexborNode, text, parent_tags):
 
         return "![%s](%s%s)" % (alt, src, title_part)
 
-    def convert_video(self, el: LexborNode, text, parent_tags):
+    def convert_video(self, el: LexborNode, text: str, parent_tags: set[str]):
         if not el.parent:
             raise NotImplementedError(
                 "video element does not have a children. Potentially malformed?"
@@ -715,7 +714,7 @@ def convert_video(self, el: LexborNode, text, parent_tags):
             return "![%s](%s)" % (text, poster)
         return text
 
-    def convert_list(self, el: LexborNode, text, parent_tags):
+    def convert_list(self, el: LexborNode, text: str, parent_tags: set[str]):
         # Converting a list to inline is undefined.
         # Ignoring inline conversion parents for list.
 
@@ -731,7 +730,7 @@ def convert_list(self, el: LexborNode, text, parent_tags):
     convert_ul = convert_list
     convert_ol = convert_list
 
-    def convert_li(self, el: LexborNode, text, parent_tags):
+    def convert_li(self, el: LexborNode, text: str, parent_tags: set[str]):
         if not el.parent:
             raise NotImplementedError(
                 "li element does not have a children. Potentially malformed?"
@@ -774,7 +773,7 @@ def _indent_for_li(match):
 
         return "%s\n" % text
 
-    def convert_p(self, el, text, parent_tags):
+    def convert_p(self, el: LexborNode, text: str, parent_tags: set[str]):
         if "_inline" in parent_tags:
             return " " + text.strip(" \t\r\n") + " "
         text = text.strip(" \t\r\n")
@@ -799,7 +798,7 @@ def convert_p(self, el, text, parent_tags):
                 text = "\n".join(new_lines)
         return "\n\n%s\n\n" % text if text else ""
 
-    def convert_pre(self, el, text, parent_tags):
+    def convert_pre(self, el: LexborNode, text: str, parent_tags: set[str]):
         if not text:
             return ""
         code_language = self.options["code_language"]
@@ -820,13 +819,17 @@ def convert_pre(self, el, text, parent_tags):
 
         return "\n\n```%s\n%s\n```\n\n" % (code_language, text)
 
-    def convert_q(self, el, text, parent_tags):
+    def convert_q(self, el: LexborNode, text: str, parent_tags: set[str] | None = None):
         return '"' + text + '"'
 
-    def convert_script(self, el, text, parent_tags):
+    def convert_script(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         return ""
 
-    def convert_style(self, el, text, parent_tags):
+    def convert_style(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         return ""
 
     convert_s = convert_del
@@ -839,16 +842,24 @@ def convert_style(self, el, text, parent_tags):
 
     convert_sup = abstract_inline_conversion(lambda self: self.options["sup_symbol"])
 
-    def convert_table(self, el: LexborNode, text, parent_tags):
+    def convert_table(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         return "\n\n" + text.strip() + "\n\n"
 
-    def convert_caption(self, el: LexborNode, text, parent_tags):
+    def convert_caption(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         return text.strip() + "\n\n"
 
-    def convert_figcaption(self, el: LexborNode, text, parent_tags):
+    def convert_figcaption(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         return "\n\n" + text.strip() + "\n\n"
 
-    def convert_td(self, el: LexborNode, text, parent_tags):
+    def convert_td(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         colspan = 1
         el_colspan = el.attributes.get("colspan")
         el_colspan = int(el_colspan) if el_colspan and el_colspan.isdigit() else 0
@@ -856,7 +867,9 @@ def convert_td(self, el: LexborNode, text, parent_tags):
             colspan = max(1, min(1000, el_colspan))
         return " " + text.strip().replace("\n", " ") + " |" * colspan
 
-    def convert_th(self, el: LexborNode, text, parent_tags):
+    def convert_th(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         colspan = 1
         el_colspan = el.attributes.get("colspan")
         el_colspan = int(el_colspan) if el_colspan and el_colspan.isdigit() else 0
@@ -864,7 +877,9 @@ def convert_th(self, el: LexborNode, text, parent_tags):
             colspan = max(1, min(1000, el_colspan))
         return " " + text.strip().replace("\n", " ") + " |" * colspan
 
-    def convert_tr(self, el: LexborNode, text, parent_tags):
+    def convert_tr(
+        self, el: LexborNode, text: str, parent_tags: set[str] | None = None
+    ):
         if not el.parent or not el.parent.parent:
             raise NotImplementedError(
                 "Found table row with no parent or sub-parent. Malformed document?"

From b2922b20c7b1961ad832dbaad055920d4cff9b0a Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 13:19:49 +0800
Subject: [PATCH 6/8] removed unused regex

---
 markdownify/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index c2749a3..75058ec 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -5,7 +5,6 @@
 from selectolax.lexbor import LexborHTMLParser, LexborNode
 
 # General-purpose regex patterns
-re_convert_heading = re.compile(r"convert_h(\d+)")
 re_line_with_content = re.compile(r"^(.*)", flags=re.MULTILINE)
 re_whitespace = re.compile(r"[\t ]+")
 re_all_whitespace = re.compile(r"[\t \r\n]+")

From 330b0ad2ddf39eb5b4b4bc30072d0fe46ac71af6 Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 13:40:23 +0800
Subject: [PATCH 7/8] Fix tests

---
 markdownify/__init__.py        | 23 ++++++++---------------
 tests/test_custom_converter.py |  4 ++--
 tests/test_escaping.py         |  2 --
 tests/types.py                 |  7 +++----
 tests/utils.py                 |  5 +++--
 5 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 75058ec..7fe47f6 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -173,7 +173,7 @@ def should_remove_whitespace_inside(el: LexborNode | None):
     """Return to remove whitespace immediately inside a block-level element."""
     if not el or not el.tag:
         return False
-    if is_header_tag(el.tag) is not None:
+    if is_header_tag(el.tag):
         return True
     return el.tag in WHITESPACE_ABLE
 
@@ -212,19 +212,10 @@ def _is_block_content_element(el: LexborNode | None):
         return False
 
 
-def _prev_block_content_sibling(el):
-    """Returns the first previous sibling that is a content element, else None."""
-    while el is not None:
-        el = el.previous_sibling
-        if _is_block_content_element(el):
-            return el
-    return None
-
-
-def _next_block_content_sibling(el):
+def _next_block_content_sibling(el:LexborNode|None):
     """Returns the first next sibling that is a content element, else None."""
     while el is not None:
-        el = el.next_sibling
+        el = el.next
         if _is_block_content_element(el):
             return el
     return None
@@ -329,7 +320,7 @@ def _can_ignore(el: LexborNode):
                 raise ValueError("Unexpected element type: %s" % type(el))
 
         children_to_convert = [
-            el for el in el.iter(include_text=True) if not _can_ignore(el) and el != el
+            el for el in el.iter(include_text=True) if not _can_ignore(el)
         ]
 
         # Create a copy of this tag's parent context, then update it to include this tag
@@ -398,7 +389,9 @@ def _can_ignore(el: LexborNode):
         return text
 
     def convert__document_(self, el: LexborNode, text: str, parent_tags: set[str]):
-        """Final document-level formatting for BeautifulSoup object (node.name == "[document]")"""
+        """Final document-level formatting for lexbor (node.tag == "[document]")"""
+        # XXX: I believe this is not needed.
+
         if self.options["strip_document"] == LSTRIP:
             text = text.lstrip("\n")  # remove leading separation newlines
         elif self.options["strip_document"] == RSTRIP:
@@ -719,7 +712,7 @@ def convert_list(self, el: LexborNode, text: str, parent_tags: set[str]):
 
         before_paragraph = False
         next_sibling = _next_block_content_sibling(el)
-        if next_sibling and next_sibling.name not in ["ul", "ol"]:
+        if next_sibling and next_sibling.tag not in ["ul", "ol"]:
             before_paragraph = True
         if "li" in parent_tags:
             # remove trailing newline if we're in a nested list
diff --git a/tests/test_custom_converter.py b/tests/test_custom_converter.py
index 00a83fc..51b1170 100644
--- a/tests/test_custom_converter.py
+++ b/tests/test_custom_converter.py
@@ -1,5 +1,5 @@
 from markdownify import MarkdownConverter
-from bs4 import BeautifulSoup
+from selectolax.lexbor import LexborHTMLParser
 
 
 class UnitTestConverter(MarkdownConverter):
@@ -40,5 +40,5 @@ def md(html, **options):
 
 def test_soup():
     html = '<b>test</b>'
-    soup = BeautifulSoup(html, 'html.parser')
+    soup = LexborHTMLParser(html)
     assert MarkdownConverter().convert_soup(soup) == '**test**'
diff --git a/tests/test_escaping.py b/tests/test_escaping.py
index bab4d11..af828e4 100644
--- a/tests/test_escaping.py
+++ b/tests/test_escaping.py
@@ -1,5 +1,4 @@
 import warnings
-from bs4 import MarkupResemblesLocatorWarning
 from .utils import md
 
 
@@ -32,7 +31,6 @@ def test_single_escaping_entities():
 
 def test_misc():
     # ignore the bs4 warning that "1.2" or "*" looks like a filename
-    warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
 
     assert md('\\*', escape_misc=True) == r'\\\*'
     assert md('&lt;foo>', escape_misc=True) == r'\<foo\>'
diff --git a/tests/types.py b/tests/types.py
index 7424978..90951de 100644
--- a/tests/types.py
+++ b/tests/types.py
@@ -1,5 +1,5 @@
 from markdownify import markdownify, ASTERISK, BACKSLASH, LSTRIP, RSTRIP, SPACES, STRIP, UNDERLINED, UNDERSCORE, MarkdownConverter
-from bs4 import BeautifulSoup
+from selectolax.lexbor import LexborHTMLParser, LexborNode
 from typing import Union
 
 markdownify("<p>Hello</p>") == "Hello"  # test default of STRIP
@@ -11,7 +11,6 @@
 # default options
 MarkdownConverter(
     autolinks=True,
-    bs4_options='html.parser',
     bullets='*+-',
     code_language='',
     code_language_callback=None,
@@ -55,11 +54,11 @@
 ).convert("")
 
 html = '<b>test</b>'
-soup = BeautifulSoup(html, 'html.parser')
+soup = LexborHTMLParser(html)
 MarkdownConverter().convert_soup(soup) == '**test**'
 
 
-def callback(el: BeautifulSoup) -> Union[str, None]:
+def callback(el: LexborNode) -> Union[str, None]:
     return el['class'][0] if el.has_attr('class') else None
 
 
diff --git a/tests/utils.py b/tests/utils.py
index 0dac580..8e455d5 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,9 +1,10 @@
 from markdownify import MarkdownConverter
+from selectolax.lexbor import LexborHTMLParser
 
 
 # for unit testing, disable document-level stripping by default so that
 # separation newlines are included in testing
-def md(html, **options):
+def md(html: str, **options):
     options = {"strip_document": None, **options}
 
-    return MarkdownConverter(**options).convert(html)
+    return MarkdownConverter(**options).convert_soup(LexborHTMLParser(html).body)

From ed93b1e2691658629fe39fa89c9d17351748c6a2 Mon Sep 17 00:00:00 2001
From: Shinon <contakt@shinon71.moe>
Date: Mon, 17 Nov 2025 13:46:54 +0800
Subject: [PATCH 8/8] Fix failing tests

---
 markdownify/__init__.py | 6 +++---
 tests/utils.py          | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 7fe47f6..bf2de77 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -330,7 +330,7 @@ def _can_ignore(el: LexborNode):
 
         # if this tag is a heading or table cell, add an '_inline' parent pseudo-tag
         if (
-            (node_tag and is_header_tag(node_tag) is not None)  # headings
+            (node_tag and is_header_tag(node_tag))  # headings
             or node_tag in {"td", "th"}  # table cells
         ):
             parent_tags_for_children.add("_inline")
@@ -464,8 +464,8 @@ def get_conv_fn(self, tag_name: str):
             return convert_fn
 
         # If tag is any heading, handle with convert_hN() function
-        match = is_header_tag(tag_name)
-        if match:
+        is_header = is_header_tag(tag_name)
+        if is_header:
             n = int(tag_name[1:])  # get value of N from <hN>
             return lambda el, text, parent_tags: self.convert_hN(
                 n, el, text, parent_tags
diff --git a/tests/utils.py b/tests/utils.py
index 8e455d5..83837f6 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,5 +1,4 @@
 from markdownify import MarkdownConverter
-from selectolax.lexbor import LexborHTMLParser
 
 
 # for unit testing, disable document-level stripping by default so that
@@ -7,4 +6,4 @@
 def md(html: str, **options):
     options = {"strip_document": None, **options}
 
-    return MarkdownConverter(**options).convert_soup(LexborHTMLParser(html).body)
+    return MarkdownConverter(**options).convert(html)