From 6552a1e09cd8d49738ea42abff558c5766f550e1 Mon Sep 17 00:00:00 2001
From: Joseph Myers <josmyers@redhat.com>
Date: Wed, 5 Feb 2025 22:15:12 +0000
Subject: [PATCH] Avoid stripping nonbreaking spaces

Nonbreaking spaces should be preserved in places such as the start of
a paragraph or blockquote, so change various places to strip only
ASCII `' \t\r\n'`.  There may be other places that should also avoid
stripping nonbreaking spaces (or, conversely, where *trailing* such
spaces could safely be stripped even if they no longer are after this
change), but this seems a reasonable starting point to fix issues in
this area.
---
 markdownify/__init__.py   | 10 +++++-----
 tests/test_conversions.py |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 9e4c99f..4f9f001 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -265,7 +265,7 @@ def process_text(self, el):
         if (should_remove_whitespace_outside(el.previous_sibling)
                 or (should_remove_whitespace_inside(el.parent)
                     and not el.previous_sibling)):
-            text = text.lstrip()
+            text = text.lstrip(' \t\r\n')
         if (should_remove_whitespace_outside(el.next_sibling)
                 or (should_remove_whitespace_inside(el.parent)
                     and not el.next_sibling)):
@@ -351,7 +351,7 @@ def convert_a(self, el, text, convert_as_inline):
 
     def convert_blockquote(self, el, text, convert_as_inline):
         # handle some early-exit scenarios
-        text = (text or '').strip()
+        text = (text or '').strip(' \t\r\n')
         if convert_as_inline:
             return ' ' + text + ' '
         if not text:
@@ -525,8 +525,8 @@ def _indent_for_li(match):
 
     def convert_p(self, el, text, convert_as_inline):
         if convert_as_inline:
-            return ' ' + text.strip() + ' '
-        text = text.strip()
+            return ' ' + text.strip(' \t\r\n') + ' '
+        text = text.strip(' \t\r\n')
         if self.options['wrap']:
             # Preserve newlines (and preceding whitespace) resulting
             # from <br> tags.  Newlines in the input have already been
@@ -535,7 +535,7 @@ def convert_p(self, el, text, convert_as_inline):
                 lines = text.split('\n')
                 new_lines = []
                 for line in lines:
-                    line = line.lstrip()
+                    line = line.lstrip(' \t\r\n')
                     line_no_trailing = line.rstrip()
                     trailing = line[len(line_no_trailing):]
                     line = fill(line,
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 1739cb9..e851ac2 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -59,6 +59,7 @@ def test_b_spaces():
 def test_blockquote():
     assert md('<blockquote>Hello</blockquote>') == '\n> Hello\n\n'
     assert md('<blockquote>\nHello\n</blockquote>') == '\n> Hello\n\n'
+    assert md('<blockquote>&nbsp;Hello</blockquote>') == '\n> \u00a0Hello\n\n'
 
 
 def test_blockquote_with_nested_paragraph():
@@ -266,6 +267,7 @@ def test_p():
     assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n1234 5678\n9012\\\n67890\n\n'
     assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n1234 5678\n9012  \n67890\n\n'
     assert md('First<p>Second</p><p>Third</p>Fourth') == 'First\n\nSecond\n\nThird\n\nFourth'
+    assert md('<p>&nbsp;x y</p>', wrap=True, wrap_width=80) == '\n\n\u00a0x y\n\n'
 
 
 def test_pre():