diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index a54cbae8..e642ccdd 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -27,3 +27,6 @@ jobs: - name: Test run: | make testone + - name: Test ReDoS + run: | + make testredos diff --git a/CHANGES.md b/CHANGES.md index 26cde7b0..192d879d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ - [pull #626] Fix XSS when encoding incomplete tags (#625) - [pull #628] Fix TypeError in MiddleWordEm extra when options was None (#627) - [pull #630] Fix nbsp breaking tables (#629) +- [pull #634] Fix ReDoS in HTML tokenizer regex (#633) ## python-markdown2 2.5.3 diff --git a/Makefile b/Makefile index b6f88c9b..d2a9a72f 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,10 @@ test: testone: cd test && python test.py -- -knownfailure +.PHONY: testredos +testredos: + python test/test_redos.py + .PHONY: pygments pygments: [[ -d deps/pygments ]] || ( \ diff --git a/lib/markdown2.py b/lib/markdown2.py index 265bb7d1..067fd0ab 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1273,7 +1273,7 @@ def _run_span_gamut(self, text: str) -> str: \s+ # whitespace after tag (?:[^\t<>"'=/]+:)? [^<>"'=/]+= # attr name - (?:".*?"|'.*?'|[^<>"'=/\s]+) # value, quoted or unquoted. If unquoted, no spaces allowed + (?:"[^"]*?"|'[^']*?'|[^<>"'=/\s]+) # value, quoted or unquoted. If unquoted, no spaces allowed )* \s*/?> | diff --git a/test/test_redos.py b/test/test_redos.py new file mode 100644 index 00000000..180ce931 --- /dev/null +++ b/test/test_redos.py @@ -0,0 +1,90 @@ +import logging +import subprocess +import sys +import time +from pathlib import Path + +log = logging.getLogger("test") +LIB_DIR = Path(__file__).parent.parent / "lib" + + +def pull_387_example_1(): + # https://github.com/trentm/python-markdown2/pull/387 + return "[#a" + " " * 3456 + + +def pull_387_example_2(): + # https://github.com/trentm/python-markdown2/pull/387 + return "```" + "\n" * 3456 + + +def pull_387_example_3(): + # https://github.com/trentm/python-markdown2/pull/387 + return "-*-" + " " * 3456 + + +def pull_402(): + # https://github.com/trentm/python-markdown2/pull/402 + return " " * 100_000 + "$" + + +def issue493(): + # https://github.com/trentm/python-markdown2/issues/493 + return "**_" + "*_" * 38730 * 10 + "\x00" + + +def issue_633(): + # https://github.com/trentm/python-markdown2/issues/633 + return '