From 9e810d517f4fb20f5febdfe9fa6d5f910fa83f72 Mon Sep 17 00:00:00 2001
From: ExcitingFrog <398303159@qq.com>
Date: Fri, 5 Sep 2025 16:43:02 +0800
Subject: [PATCH] Add rowspan support for HTML tables
---
markdownify/__init__.py | 250 ++++++++++++++++++++++++++++++++--------
tests/test_tables.py | 151 ++++++++++++++++++++++--
2 files changed, 344 insertions(+), 57 deletions(-)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 148d340..3d8c1a4 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -21,7 +21,8 @@
# Extract (leading_nl, content, trailing_nl) from a string
# (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
-re_extract_newlines = re.compile(r'^(\n*)((?:.*[^\n])?)(\n*)$', flags=re.DOTALL)
+re_extract_newlines = re.compile(
+ r'^(\n*)((?:.*[^\n])?)(\n*)$', flags=re.DOTALL)
# Escape miscellaneous special Markdown characters
re_escape_misc_chars = re.compile(r'([]\\&<`[>~=+|])')
@@ -100,6 +101,7 @@ def abstract_inline_conversion(markup_fn):
the text if it looks like an HTML tag. markup_fn is necessary to allow for
references to self.strong_em_symbol etc.
"""
+
def implementation(self, el, text, parent_tags):
markup_prefix = markup_fn(self)
if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
@@ -213,7 +215,8 @@ def __init__(self, **options):
# If a string or list is passed to bs4_options, assume it is a 'features' specification
if not isinstance(self.options['bs4_options'], dict):
- self.options['bs4_options'] = {'features': self.options['bs4_options']}
+ self.options['bs4_options'] = {
+ 'features': self.options['bs4_options']}
# Initialize the conversion function cache
self.convert_fn_cache = {}
@@ -265,7 +268,8 @@ def _can_ignore(el):
else:
raise ValueError('Unexpected element type: %s' % type(el))
- children_to_convert = [el for el in node.children if not _can_ignore(el)]
+ children_to_convert = [
+ el for el in node.children if not _can_ignore(el)]
# Create a copy of this tag's parent context, then update it to include this tag
# to propagate down into the children.
@@ -301,17 +305,21 @@ def _can_ignore(el):
updated_child_strings = [''] # so the first lookback works
for child_string in child_strings:
# Separate the leading/trailing newlines from the content.
- leading_nl, content, trailing_nl = re_extract_newlines.match(child_string).groups()
+ leading_nl, content, trailing_nl = re_extract_newlines.match(
+ child_string).groups()
# If the last child had trailing newlines and this child has leading newlines,
# use the larger newline count, limited to 2.
if updated_child_strings[-1] and leading_nl:
- prev_trailing_nl = updated_child_strings.pop() # will be replaced by the collapsed value
- num_newlines = min(2, max(len(prev_trailing_nl), len(leading_nl)))
+ # will be replaced by the collapsed value
+ prev_trailing_nl = updated_child_strings.pop()
+ num_newlines = min(
+ 2, max(len(prev_trailing_nl), len(leading_nl)))
leading_nl = '\n' * num_newlines
# Add the results to the updated child string list.
- updated_child_strings.extend([leading_nl, content, trailing_nl])
+ updated_child_strings.extend(
+ [leading_nl, content, trailing_nl])
child_strings = updated_child_strings
@@ -332,11 +340,13 @@ def convert__document_(self, el, text, parent_tags):
elif self.options['strip_document'] == RSTRIP:
text = text.rstrip('\n') # remove trailing separation newlines
elif self.options['strip_document'] == STRIP:
- text = text.strip('\n') # remove leading and trailing separation newlines
+ # remove leading and trailing separation newlines
+ text = text.strip('\n')
elif self.options['strip_document'] is None:
pass # leave leading and trailing separation newlines as-is
else:
- raise ValueError('Invalid value for strip_document: %s' % self.options['strip_document'])
+ raise ValueError('Invalid value for strip_document: %s' %
+ self.options['strip_document'])
return text
@@ -391,7 +401,8 @@ def get_conv_fn(self, tag_name):
return None
# Look for an explicitly defined conversion function by tag name first
- convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub("_", tag_name)
+ convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub(
+ "_", tag_name)
convert_fn = getattr(self, convert_fn_name, None)
if convert_fn:
return convert_fn
@@ -455,7 +466,8 @@ def convert_a(self, el, text, parent_tags):
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
- convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])
+ convert_b = abstract_inline_conversion(
+ lambda self: 2 * self.options['strong_em_symbol'])
def convert_blockquote(self, el, text, parent_tags):
# handle some early-exit scenarios
@@ -492,7 +504,8 @@ def convert_code(self, el, text, parent_tags):
# Find the maximum number of consecutive backticks in the text, then
# delimit the code span with one more backtick than that
- max_backticks = max((len(match) for match in re.findall(re_backtick_runs, text)), default=0)
+ max_backticks = max(
+ (len(match) for match in re.findall(re_backtick_runs, text)), default=0)
markup_delimiter = '`' * (max_backticks + 1)
# If the maximum number of backticks is greater than zero, add a space
@@ -514,7 +527,8 @@ def convert_div(self, el, text, parent_tags):
convert_section = convert_div
- convert_em = abstract_inline_conversion(lambda self: self.options['strong_em_symbol'])
+ convert_em = abstract_inline_conversion(
+ lambda self: self.options['strong_em_symbol'])
convert_kbd = convert_code
@@ -691,7 +705,8 @@ def convert_pre(self, el, text, parent_tags):
code_language = self.options['code_language']
if self.options['code_language_callback']:
- code_language = self.options['code_language_callback'](el) or code_language
+ code_language = self.options['code_language_callback'](
+ el) or code_language
if self.options['strip_pre'] == STRIP:
text = strip_pre(text) # remove all leading/trailing newlines
@@ -700,7 +715,8 @@ def convert_pre(self, el, text, parent_tags):
elif self.options['strip_pre'] is None:
pass # leave leading and trailing newlines as-is
else:
- raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
+ raise ValueError('Invalid value for strip_pre: %s' %
+ self.options['strip_pre'])
return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
@@ -719,9 +735,11 @@ def convert_style(self, el, text, parent_tags):
convert_samp = convert_code
- convert_sub = abstract_inline_conversion(lambda self: self.options['sub_symbol'])
+ convert_sub = abstract_inline_conversion(
+ lambda self: self.options['sub_symbol'])
- convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol'])
+ convert_sup = abstract_inline_conversion(
+ lambda self: self.options['sup_symbol'])
def convert_table(self, el, text, parent_tags):
return '\n\n' + text.strip() + '\n\n'
@@ -745,49 +763,183 @@ def convert_th(self, el, text, parent_tags):
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
def convert_tr(self, el, text, parent_tags):
+ table = el.find_parent('table')
+ if not table:
+ # No table context, use original logic
+ cells = el.find_all(['td', 'th'])
+ full_colspan = 0
+ for cell in cells:
+ if 'colspan' in cell.attrs and cell['colspan'].isdigit():
+ full_colspan += max(1, min(1000, int(cell['colspan'])))
+ else:
+ full_colspan += 1
+ is_first_row = el.find_previous_sibling() is None
+ is_headrow = (
+ all([cell.name == 'th' for cell in cells])
+ or (el.parent.name == 'thead' and len(el.parent.find_all('tr')) == 1)
+ )
+ is_head_row_missing = (
+ (is_first_row and not el.parent.name == 'tbody')
+ or (is_first_row and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
+ )
+ overline = ''
+ underline = ''
+ if ((is_headrow or (is_head_row_missing and self.options['table_infer_header'])) and is_first_row):
+ underline += '| ' + \
+ ' | '.join(['---'] * full_colspan) + ' |' + '\n'
+ elif ((is_head_row_missing and not self.options['table_infer_header']) or
+ (is_first_row and (el.parent.name == 'table' or (el.parent.name == 'tbody' and not el.parent.find_previous_sibling())))):
+ overline += '| ' + \
+ ' | '.join([''] * full_colspan) + ' |' + '\n'
+ overline += '| ' + \
+ ' | '.join(['---'] * full_colspan) + ' |' + '\n'
+ return overline + '|' + text + '\n' + underline
+
+ # Check if table has any rowspan before using grid
+ has_rowspan = self._table_has_rowspan(table)
+ if not has_rowspan:
+ # Use original logic for tables without rowspan
+ cells = el.find_all(['td', 'th'])
+ full_colspan = 0
+ for cell in cells:
+ if 'colspan' in cell.attrs and cell['colspan'].isdigit():
+ full_colspan += max(1, min(1000, int(cell['colspan'])))
+ else:
+ full_colspan += 1
+ is_first_row = el.find_previous_sibling() is None
+ is_headrow = (
+ all([cell.name == 'th' for cell in cells])
+ or (el.parent.name == 'thead' and len(el.parent.find_all('tr')) == 1)
+ )
+ is_head_row_missing = (
+ (is_first_row and not el.parent.name == 'tbody')
+ or (is_first_row and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
+ )
+
+ overline = ''
+ underline = ''
+ if ((is_headrow or (is_head_row_missing and self.options['table_infer_header'])) and is_first_row):
+ underline += '| ' + \
+ ' | '.join(['---'] * full_colspan) + ' |' + '\n'
+ elif ((is_head_row_missing and not self.options['table_infer_header']) or
+ (is_first_row and (el.parent.name == 'table' or (el.parent.name == 'tbody' and not el.parent.find_previous_sibling())))):
+ overline += '| ' + \
+ ' | '.join([''] * full_colspan) + ' |' + '\n'
+ overline += '| ' + \
+ ' | '.join(['---'] * full_colspan) + ' |' + '\n'
+
+ return overline + '|' + text + '\n' + underline
+
+ # For tables with rowspan, we need to add empty cells where needed
+ # Build table grid if not already done
+ if not hasattr(table, '_md_rowspan_cells') or table._md_rowspan_cells is None:
+ self._build_rowspan_cells(table)
+
+ # Get current row index
+ all_rows = table.find_all('tr')
+ current_row_idx = all_rows.index(el)
+
+ # Insert empty cells for rowspan at the beginning of the text
+ empty_cells = table._md_rowspan_cells[current_row_idx]
+ final_text = empty_cells + text
+
+ # Handle headers with correct column count
cells = el.find_all(['td', 'th'])
+ original_cell_count = 0
+ for cell in cells:
+ if 'colspan' in cell.attrs and cell['colspan'].isdigit():
+ original_cell_count += max(1, min(1000, int(cell['colspan'])))
+ else:
+ original_cell_count += 1
+
+ # Add empty cells count
+ total_columns = original_cell_count + empty_cells.count(' |')
+
is_first_row = el.find_previous_sibling() is None
is_headrow = (
all([cell.name == 'th' for cell in cells])
- or (el.parent.name == 'thead'
- # avoid multiple tr in thead
- and len(el.parent.find_all('tr')) == 1)
+ or (el.parent.name == 'thead' and len(el.parent.find_all('tr')) == 1)
)
is_head_row_missing = (
(is_first_row and not el.parent.name == 'tbody')
or (is_first_row and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
)
+
overline = ''
underline = ''
- full_colspan = 0
- for cell in cells:
- if 'colspan' in cell.attrs and cell['colspan'].isdigit():
- full_colspan += max(1, min(1000, int(cell['colspan'])))
- else:
- full_colspan += 1
- if ((is_headrow
- or (is_head_row_missing
- and self.options['table_infer_header']))
- and is_first_row):
- # first row and:
- # - is headline or
- # - headline is missing and header inference is enabled
- # print headline underline
- underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
- elif ((is_head_row_missing
- and not self.options['table_infer_header'])
- or (is_first_row
- and (el.parent.name == 'table'
- or (el.parent.name == 'tbody'
- and not el.parent.find_previous_sibling())))):
- # headline is missing and header inference is disabled or:
- # first row, not headline, and:
- # - the parent is table or
- # - the parent is tbody at the beginning of a table.
- # print empty headline above this row
- overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
- overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
- return overline + '|' + text + '\n' + underline
+ if ((is_headrow or (is_head_row_missing and self.options['table_infer_header'])) and is_first_row):
+ underline += '| ' + \
+ ' | '.join(['---'] * total_columns) + ' |' + '\n'
+ elif ((is_head_row_missing and not self.options['table_infer_header']) or
+ (is_first_row and (el.parent.name == 'table' or (el.parent.name == 'tbody' and not el.parent.find_previous_sibling())))):
+ overline += '| ' + ' | '.join([''] * total_columns) + ' |' + '\n'
+ overline += '| ' + \
+ ' | '.join(['---'] * total_columns) + ' |' + '\n'
+
+ return overline + '|' + final_text + '\n' + underline
+
+ def _build_rowspan_cells(self, table):
+ """Calculate which cells need to be added as empty for rowspan"""
+ all_rows = table.find_all('tr')
+
+ # Track which columns are occupied by rowspan from previous rows
+ occupied_columns = {} # {row_idx: [list of column indices]}
+
+ table._md_rowspan_cells = []
+
+ for row_idx, row in enumerate(all_rows):
+ cells = row.find_all(['td', 'th'])
+ empty_cells_prefix = ''
+ col_pos = 0
+
+ # Count how many columns are occupied by rowspan from previous rows
+ occupied_for_this_row = occupied_columns.get(row_idx, [])
+ occupied_for_this_row.sort()
+
+ # Add empty cells for each occupied column at the beginning
+ for occupied_col in occupied_for_this_row:
+ if occupied_col == col_pos:
+ empty_cells_prefix += ' |'
+ col_pos += 1
+
+ table._md_rowspan_cells.append(empty_cells_prefix)
+
+ # Process current row cells to set up future rowspan occupancy
+ for cell in cells:
+ # Skip columns occupied by rowspan
+ while col_pos in occupied_for_this_row:
+ col_pos += 1
+
+ # Get cell dimensions
+ colspan = 1
+ if 'colspan' in cell.attrs and cell['colspan'].isdigit():
+ colspan = max(1, min(1000, int(cell['colspan'])))
+
+ rowspan = 1
+ if 'rowspan' in cell.attrs and cell['rowspan'].isdigit():
+ rowspan = max(1, min(1000, int(cell['rowspan'])))
+
+ # Mark future rows as occupied if this cell has rowspan > 1
+ if rowspan > 1:
+ for future_row in range(row_idx + 1, min(row_idx + rowspan, len(all_rows))):
+ for span_col in range(col_pos, col_pos + colspan):
+ if future_row not in occupied_columns:
+ occupied_columns[future_row] = []
+ occupied_columns[future_row].append(span_col)
+
+ # Move column position
+ col_pos += colspan
+
+ def _table_has_rowspan(self, table):
+ """Check if table has any rowspan attributes > 1"""
+ for cell in table.find_all(['td', 'th']):
+ if cell.get('rowspan'):
+ try:
+ if int(cell.get('rowspan')) > 1:
+ return True
+ except (ValueError, TypeError):
+ pass
+ return False
def markdownify(html, **options):
diff --git a/tests/test_tables.py b/tests/test_tables.py
index 7e0670c..238395b 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -283,11 +283,126 @@
94 |
"""
+# Rowspan test tables
+table_with_simple_rowspan = """
+
+ | Name |
+ Department |
+ Age |
+
+
+ | John |
+ IT |
+ 30 |
+
+
+ | Management |
+ 31 |
+
+
+ | Jane |
+ HR |
+ 28 |
+
+
"""
+
+table_with_complex_rowspan = """
+
+ | Category |
+ Item |
+ Price |
+ Stock |
+
+
+ | Electronics |
+ Phone |
+ $500 |
+ 10 |
+
+
+ | Laptop |
+ $1000 |
+ 5 |
+
+
+ | Tablet |
+ $300 |
+ 8 |
+
+
+ | Books |
+ Novel |
+ $15 |
+ 20 |
+
+
"""
+
+table_with_rowspan_and_colspan = """
+
+ | Product Info |
+ Details |
+
+
+ | Electronics |
+ Phone |
+ Latest model |
+
+
+ | Laptop |
+ High performance |
+
+
"""
+
+table_with_multiple_rowspan = """
+
+ | Region |
+ Country |
+ City |
+ Population |
+
+
+ | Asia |
+ China |
+ Beijing |
+ 21M |
+
+
+ | Shanghai |
+ 24M |
+
+
+ | Europe |
+ France |
+ Paris |
+ 2M |
+
+
"""
+
+table_with_thead_rowspan = """
+
+
+ | Name |
+ Contact |
+
+
+ | Email |
+ Phone |
+
+
+
+
+ | John |
+ john@example.com |
+ 123-456 |
+
+
+
"""
def test_table():
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(
+ table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
@@ -299,23 +414,43 @@ def test_table():
assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(
+ table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_simple_rowspan) == '\n\n| Name | Department | Age |\n| --- | --- | --- |\n| John | IT | 30 |\n| | Management | 31 |\n| Jane | HR | 28 |\n\n'
+ assert md(table_with_complex_rowspan) == '\n\n| Category | Item | Price | Stock |\n| --- | --- | --- | --- |\n| Electronics | Phone | $500 | 10 |\n| | Laptop | $1000 | 5 |\n| | Tablet | $300 | 8 |\n| Books | Novel | $15 | 20 |\n\n'
+ assert md(table_with_rowspan_and_colspan) == '\n\n| Product Info | | Details |\n| --- | --- | --- |\n| Electronics | Phone | Latest model |\n| | Laptop | High performance |\n\n'
+ assert md(table_with_multiple_rowspan) == '\n\n| Region | Country | City | Population |\n| --- | --- | --- | --- |\n| Asia | China | Beijing | 21M |\n| | | Shanghai | 24M |\n| Europe | France | Paris | 2M |\n\n'
+ assert md(table_with_thead_rowspan) == '\n\n| Name | Contact | |\n| --- | --- | --- |\n| | Email | Phone |\n| John | john@example.com | 123-456 |\n\n'
def test_table_infer_header():
assert md(table, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_html_content, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content,
+ table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_paragraphs, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_linebreaks, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
- assert md(table_with_header_column, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_header_column,
+ table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body_multiple_head, table_infer_header=True) == '\n\n| Creator | Editor | Server |\n| --- | --- | --- |\n| Operator | Manager | Engineer |\n| Bob | Oliver | Tom |\n| Thomas | Lucas | Ethan |\n\n'
- assert md(table_head_body_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body_missing_head,
+ table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_missing_text, table_infer_header=True) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_missing_head, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_body, table_infer_header=True) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
+ assert md(table_with_caption,
+ table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
- assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
- assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_undefined_colspan,
+ table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
+ assert md(table_with_colspan_missing_head,
+ table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_simple_rowspan,
+ table_infer_header=True) == '\n\n| Name | Department | Age |\n| --- | --- | --- |\n| John | IT | 30 |\n| | Management | 31 |\n| Jane | HR | 28 |\n\n'
+ assert md(table_with_complex_rowspan, table_infer_header=True) == '\n\n| Category | Item | Price | Stock |\n| --- | --- | --- | --- |\n| Electronics | Phone | $500 | 10 |\n| | Laptop | $1000 | 5 |\n| | Tablet | $300 | 8 |\n| Books | Novel | $15 | 20 |\n\n'
+ assert md(table_with_rowspan_and_colspan,
+ table_infer_header=True) == '\n\n| Product Info | | Details |\n| --- | --- | --- |\n| Electronics | Phone | Latest model |\n| | Laptop | High performance |\n\n'
+ assert md(table_with_multiple_rowspan, table_infer_header=True) == '\n\n| Region | Country | City | Population |\n| --- | --- | --- | --- |\n| Asia | China | Beijing | 21M |\n| | | Shanghai | 24M |\n| Europe | France | Paris | 2M |\n\n'
+ assert md(table_with_thead_rowspan,
+ table_infer_header=True) == '\n\n| Name | Contact | |\n| --- | --- | --- |\n| | Email | Phone |\n| John | john@example.com | 123-456 |\n\n'