diff --git a/CHANGES.md b/CHANGES.md index 11db62d5..60c03486 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ - [pull #639] Fix middle-word-em interfering with strongs (#637) - [pull #640] Fix code friendly extra stopping other syntax being processed (#638) +- [pull #644] Fix a number of em/strong issues (#641, #642, #643) ## python-markdown2 2.5.4 diff --git a/lib/markdown2.py b/lib/markdown2.py index 488b24cc..8b99ec3d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1988,14 +1988,48 @@ def _encode_code(self, text: str) -> str: self._code_table[text] = hashed return hashed - _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S) + _strong_re = re.compile(r''' + (?:_{1,}|\*{1,})? # ignore any leading em chars because we want to wrap `` as tightly around the text as possible + # eg: `***abc***` -> `*abc*` instead of `*abc*` + # Makes subsequent processing easier + (\*\*|__)(?=\S) # strong syntax - must be followed by a non whitespace char + (.+?) # the strong text itself + (?<=\S)\1 # closing syntax - must be preceeded by non whitespace char + ''', + re.S | re.X + ) _em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S) @mark_stage(Stage.ITALIC_AND_BOLD) def _do_italics_and_bold(self, text: str) -> str: + def sub(match: re.Match): + ''' + regex sub function that checks that the match isn't matching across spans. + The span shouldn't be across a closing or opening HTML tag, although spans within + the span is acceptable. + ''' + contents: str = match.group(2) + # the strong re also checks for leading em chars, so the match may cover some additional text + prefix = match.string[match.start(): match.regs[1][0]] + # look for all possible span HTML tags + for tag in re.findall(rf'abcdef_`, which is across 2 spans + close_index = contents.find(f'{contents}' + # must go first: - text = self._strong_re.sub(r"\2", text) - text = self._em_re.sub(r"\2", text) + text = self._strong_re.sub(sub, text) + text = self._em_re.sub(sub, text) return text _block_quote_base = r''' @@ -3320,7 +3354,7 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]): self.middle_word_em_re = re.compile( r''' (?This is strong and em.

+

This is strong and em.

-

So is this word.

+

So is this word.

-

This is strong and em.

+

This is strong and em.

-

So is this word.

+

So is this word.

diff --git a/test/tm-cases/consecutive_strong_and_em.html b/test/tm-cases/consecutive_strong_and_em.html new file mode 100644 index 00000000..6478dd07 --- /dev/null +++ b/test/tm-cases/consecutive_strong_and_em.html @@ -0,0 +1 @@ +

strongemstrong

diff --git a/test/tm-cases/consecutive_strong_and_em.text b/test/tm-cases/consecutive_strong_and_em.text new file mode 100644 index 00000000..663723f9 --- /dev/null +++ b/test/tm-cases/consecutive_strong_and_em.text @@ -0,0 +1 @@ +**strong***em***strong** diff --git a/test/tm-cases/ems_across_spans.html b/test/tm-cases/ems_across_spans.html new file mode 100644 index 00000000..daef521f --- /dev/null +++ b/test/tm-cases/ems_across_spans.html @@ -0,0 +1 @@ +

_confusing ident is _confusing

diff --git a/test/tm-cases/ems_across_spans.text b/test/tm-cases/ems_across_spans.text new file mode 100644 index 00000000..40cd465c --- /dev/null +++ b/test/tm-cases/ems_across_spans.text @@ -0,0 +1 @@ +**_confusing** ident is **_confusing** \ No newline at end of file diff --git a/test/tm-cases/middle_word_em_issue641.html b/test/tm-cases/middle_word_em_issue641.html new file mode 100644 index 00000000..39886631 --- /dev/null +++ b/test/tm-cases/middle_word_em_issue641.html @@ -0,0 +1,3 @@ +

Strong (em)

+ +

note:this is good, but this is not

diff --git a/test/tm-cases/middle_word_em_issue641.opts b/test/tm-cases/middle_word_em_issue641.opts new file mode 100644 index 00000000..f1455c41 --- /dev/null +++ b/test/tm-cases/middle_word_em_issue641.opts @@ -0,0 +1 @@ +{'extras': {'middle-word-em': False}} \ No newline at end of file diff --git a/test/tm-cases/middle_word_em_issue641.text b/test/tm-cases/middle_word_em_issue641.text new file mode 100644 index 00000000..b14e5d28 --- /dev/null +++ b/test/tm-cases/middle_word_em_issue641.text @@ -0,0 +1,3 @@ +**Strong** (*em*) + +note:*this is good*, but *this is not* \ No newline at end of file diff --git a/test/tm-cases/middle_word_em_with_extra_ems.html b/test/tm-cases/middle_word_em_with_extra_ems.html index a86b1932..a8974039 100644 --- a/test/tm-cases/middle_word_em_with_extra_ems.html +++ b/test/tm-cases/middle_word_em_with_extra_ems.html @@ -2,7 +2,7 @@

one_two_three

-

one_two_three

+

one_two_three

one_two_three