From dd06cd953bff680aee8f92acfe70233f42c106aa Mon Sep 17 00:00:00 2001
From: Duncan Findlay <duncf@google.com>
Date: Fri, 14 Jul 2017 17:31:46 -0700
Subject: [PATCH] Improve and simplify the CommaProxyList algorithm.

With this change CommaProxyList better handles and preserves non-standard (and
even inconsistent) indentation, rather than assuming indented lists are indented
by four spaces (fixes #100). It also is significantly faster in very large files
by avoiding use of ".indentation", which can require iterating over every
element in the file. End-of-list comments are preserved at the end of the list,
and inline comments are also preserved.
---
 redbaron/base_nodes.py   | 177 +++++++++++++++++++++++++++------------
 tests/test_proxy_list.py | 131 +++++++++++++++++++++--------
 2 files changed, 221 insertions(+), 87 deletions(-)

diff --git a/redbaron/base_nodes.py b/redbaron/base_nodes.py
index ff63540b..cb51d070 100644
--- a/redbaron/base_nodes.py
+++ b/redbaron/base_nodes.py
@@ -1490,24 +1490,64 @@ def __init__(self, node_list, on_attribute="value"):
         super(CommaProxyList, self).__init__(node_list, on_attribute=on_attribute)
         self.style = "indented" if any(self.node_list('comma', recursive=False).map(lambda x: x('endl'))) else "flat"
 
-        # XXX will likely break if the user modify the formatting of the list,
-        # I don't like that
-        self.has_trailing = self.node_list and self.node_list[-1].type == "comma"
+    def _infer_middle_indentation(self):
+        """Determines how far to indent nodes for an indented list."""
+        # Find a non-trailing comma node, and use the existing indentation. This
+        # only works if we have a list with more than 2 elements.
+        for node in self.node_list[:-1]:
+            if node.type != "comma":
+              continue
+            endls = node.find_all("endl")
+            if endls:
+                return endls[-1].indent
+        # Otherwise, take the first element's indent level. This can be
+        # expensive.
+        if self.node_list:
+            return self.node_list[0].indentation
+        # Empty list, so just assume 4 spaces from parent's indent.
+        return self.parent.indentation + "    "
+
+    def _get_middle_separator(self, indent=None):
+        """Builds a new middle separator (comma).
+
+        If indentation is provided, the comma is followed by a newline with the
+        given indentation. Otherwise, it's a single space.
+        """
+        if indent is not None:
+            return redbaron.nodes.CommaNode(
+                {"type": "comma", "first_formatting": [],
+                 "second_formatting": [{
+                     "type": "endl",
+                     "indent": indent,
+                     "formatting": [], "value": "\n"}]})
+        else:
+            return redbaron.nodes.CommaNode(
+                {"type": "comma", "first_formatting": [],
+                 "second_formatting": [{"type": "space", "value": " "}]})
 
-    def _get_middle_separator(self):
-        if self.style == "indented":
-            return redbaron.nodes.CommaNode({"type": "comma", "first_formatting": [], "second_formatting": [
-                {"type": "endl", "indent": self.parent.indentation + "    ", "formatting": [], "value": "\n"}]})
+    def _split_formatting(self, node_list):
+        """Splits a list of formatting nodes on the first endl.
 
-        return redbaron.nodes.CommaNode(
-            {"type": "comma", "first_formatting": [], "second_formatting": [{"type": "space", "value": " "}]})
+        This is useful for determining which formatting nodes logically belong
+        with the following element.
+        """
+        endl_found = -1
+        for i, fnode in enumerate(node_list):
+            if fnode.type == "endl":
+                endl_found = i
+                break
+
+        # Anything *after* the first endl is associated with the end of the
+        # list.
+        if endl_found >= 0:
+            return (node_list[:i+1], node_list[i+1:])
+        return node_list, []
 
     def _generate_expected_list(self):
-        def generate_separator():
-            separator = self._get_middle_separator()
-            separator.parent = self.node_list
-            separator.on_attribute = self.on_attribute
-            return separator
+        """Converts self.data into a list of nodes."""
+
+        middle_indentation = (self._infer_middle_indentation()
+                              if self.style == "indented" else None)
 
         # XXX will break comments
         if not self.data:
@@ -1515,48 +1555,79 @@ def generate_separator():
             self.parent.second_formatting = []
             return []
 
-        expected_list = []
+        trailing_indent = None
+        end_of_list_formatting = None
+        has_trailing_comma = False
+
+        # The basic idea here is that every element of the list is stored in
+        # self.data, along with the CommaNode that follows it (if
+        # applicable). We have to turn this into a flat list of nodes, and fix
+        # the formatting of the CommaNodes so that the list looks reasonable.
+        #
+        # One of the elements of self.data may have a CommaNode that was
+        # previously the last element of the list. This will have different
+        # formatting than a CommaNode elsewhere in the list.
+        #
+        # To make things simpler, we start by converting this trailing CommaNode
+        # into a standard middle-separator (if it exists), by pulling it from
+        # self.node_list.
+        if self.node_list and self.node_list[-1].type == "comma":
+            old_trailing_comma = self.node_list[-1]
+            # Keep comments associated with the end of list for later.
+            keep, end_of_list_formatting = self._split_formatting(
+                old_trailing_comma.second_formatting)
+            if keep[-1].type == "endl":
+                # Change the indent level.
+                trailing_indent = keep[-1].indent
+                if middle_indentation:
+                    keep[-1].indent = middle_indentation
+            old_trailing_comma.second_formatting = keep
+            has_trailing_comma = True
+        elif self.node_list:
+            # We may still need to know the trailing indent, in case we add a
+            # trailing comma later.
+            endls = self.node_list[-1].find_all("endl")
+            if endls:
+                trailing_indent = endls[-1].indent
 
-        for position, i in enumerate(self.data):
-            is_last = position == len(self.data) - 1
-            expected_list.append(i[0])
-            # XXX this will need refactoring...
-            if i[1] is not None:
-                # here we encounter a middle value that should have formatting
-                # to separate between the intems but has not so we add it
-                # this happen because a new value has been added after this one
-                if not is_last and not i[1]:
-                    expected_list.append(generate_separator())
-
-                # comma list doesn't have trailing but has a comma at its end, remove it
-                elif is_last and not self.has_trailing and i[1] and i[1][0].type == "comma":
-                    # XXX this will likely break comments if presents at the end of the list
-                    pass
-                else:
-                    expected_list += i[1]
+        expected_list = []
 
-                    # XXX will break comments
-                    if self.style == "indented":
-                        if not expected_list[-1].second_formatting.endl:
-                            raise Exception(
-                                "It appears that you have indentation in your CommaList, for now RedBaron doesn't know how to handle this situation (which requires a lot of work), sorry about that. You can find more information here https://github.com/PyCQA/redbaron/issues/100")
-                        elif expected_list[-1].second_formatting.endl.indent != self.parent.indentation + " " * 4:
-                            expected_list[-1].second_formatting.endl.indent = self.parent.indentation + " " * 4
+        # Now, iterate through self.data and add things to our node list. Every
+        # element either has an associated middle-separator, or no associated
+        # formatting.
+        for position, (elem, formatting) in enumerate(self.data):
+            if formatting:
+                expected_list.append(elem)
+                expected_list.extend(formatting)
             else:
-                # here we generate the new expected formatting
-                # None is used as a sentry value for newly inserted values in the proxy list
-                if not is_last:
-                    expected_list.append(generate_separator())
-                elif self.has_trailing:
-                    expected_list.append(generate_separator())
-                    expected_list[-1].second_formatting[0].indent = ""
-
-        if expected_list and self.has_trailing and self.style == "indented":
-            if not expected_list[-1].second_formatting.endl:
-                raise Exception(
-                    "It appears that you have indentation in your CommaList, for now RedBaron doesn't know how to handle this situation (which requires a lot of work), sorry about that. You can find more information here https://github.com/PyCQA/redbaron/issues/100")
-            elif expected_list[-1].second_formatting.endl.indent != self.parent.indentation:
-                expected_list[-1].second_formatting.endl.indent = self.parent.indentation
+                if middle_indentation is not None:
+                    # Fix indentation, if applicable.
+                    endls = elem.find_all("endl")
+                    if endls:
+                        endls[-1].indent = middle_indentation
+
+                separator = self._get_middle_separator(middle_indentation)
+                separator.parent = self.node_list
+                separator.on_attribute = self.on_attribute
+                expected_list.append(elem)
+                expected_list.append(separator)
+
+        # Ok, now adjust the last element of the list, which contains a middle
+        # separator. We want to change it to be a trailing separator.
+        if expected_list and expected_list[-1].type == "comma":
+            trailing_comma = expected_list[-1]
+            if not has_trailing_comma and not trailing_comma.find("comment"):
+                # We don't want a trailing comma.
+                del expected_list[-1]
+                trailing_comma = None
+
+            if trailing_comma:
+                endls = trailing_comma.find_all("endl")
+                if endls and trailing_indent is not None:
+                    endls[-1].indent = trailing_indent
+                if end_of_list_formatting:
+                    trailing_comma.second_formatting.extend(
+                        end_of_list_formatting)
 
         return expected_list
 
diff --git a/tests/test_proxy_list.py b/tests/test_proxy_list.py
index 23bd170a..a6ea765b 100644
--- a/tests/test_proxy_list.py
+++ b/tests/test_proxy_list.py
@@ -1,5 +1,4 @@
 #!/usr/bin/python
-# -*- coding:Utf-8 -*-
 
 """ Tests the rendering feature """
 
@@ -847,7 +846,8 @@ def test_regression_help_proxy_list():
     red = RedBaron("(1, 2)")
     red[0].value.node_list.help()
 
-
+# Many of these tests have test code using 3 spaces for indenting to make sure
+# we're not relying on a hard-coded assumption of 4 spaces somewhere.
 def test_comma_proxy_list_indented_len_not_empty():
     red = RedBaron("[\n    1,\n    2,\n    3,\n]")
     comma_proxy_list = red[0].value
@@ -877,25 +877,25 @@ def test_comma_proxy_list_indented_detect_style():
 
 
 def test_comma_proxy_list_indented_insert_2_at_top():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.insert(0, "2")
-    assert red.dumps() == "[\n    2,\n    1,\n]"
+    assert red.dumps() == "[\n   2,\n   1,\n]"
 
 
 def test_comma_proxy_list_indented_insert_2():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.insert(1, "2")
     assert comma_proxy_list.style == "indented"
-    assert red.dumps() == "[\n    1,\n    2,\n]"
+    assert red.dumps() == "[\n   1,\n   2,\n]"
 
 
 def test_comma_proxy_list_indented_insert_2_middle():
-    red = RedBaron("[\n    1,\n    3,\n]")
+    red = RedBaron("[\n   1,\n   3,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.insert(1, "2")
-    assert red.dumps() == "[\n    1,\n    2,\n    3,\n]"
+    assert red.dumps() == "[\n   1,\n   2,\n   3,\n]"
 
 
 # XXX need to rethink this behavior
@@ -908,77 +908,77 @@ def test_comma_proxy_list_indented_insert_2_middle():
 
 
 def test_comma_proxy_list_indented_append_2():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.append("2")
-    assert red.dumps() == "[\n    1,\n    2,\n]"
+    assert red.dumps() == "[\n   1,\n   2,\n]"
 
 
 def test_comma_proxy_list_indented_pop():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.pop(0)
     assert red.dumps() == "[]"
 
 
 def test_comma_proxy_list_indented_pop_2_at_top():
-    red = RedBaron("[\n    2,\n    1,\n]")
+    red = RedBaron("[\n   2,\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.pop(0)
-    assert red.dumps() == "[\n    1,\n]"
+    assert red.dumps() == "[\n   1,\n]"
 
 
 def test_comma_proxy_list_indented_pop_2():
-    red = RedBaron("[\n    1,\n    2,\n]")
+    red = RedBaron("[\n   1,\n   2,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.pop(1)
-    assert red.dumps() == "[\n    1,\n]"
+    assert red.dumps() == "[\n   1,\n]"
 
 
 def test_comma_proxy_list_indented_pop_2_middle():
-    red = RedBaron("[\n    1,\n    2,\n    3,\n]")
+    red = RedBaron("[\n   1,\n   2,\n   3,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.pop(1)
-    assert red.dumps() == "[\n    1,\n    3,\n]"
+    assert red.dumps() == "[\n   1,\n   3,\n]"
 
 
 def test_comma_proxy_list_indented_pop_no_index():
-    red = RedBaron("[\n    1,\n    2,\n    3,\n]")
+    red = RedBaron("[\n   1,\n   2,\n   3,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.pop()
-    assert red.dumps() == "[\n    1,\n    2,\n]"
+    assert red.dumps() == "[\n   1,\n   2,\n]"
 
 
 def test_comma_proxy_list_indented_del():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     del comma_proxy_list[0]
     assert red.dumps() == "[]"
 
 
 def test_comma_proxy_list_indented_del_2_at_top():
-    red = RedBaron("[\n    2,\n    1,\n]")
+    red = RedBaron("[\n   2,\n   1,\n]")
     comma_proxy_list = red[0].value
     del comma_proxy_list[0]
-    assert red.dumps() == "[\n    1,\n]"
+    assert red.dumps() == "[\n   1,\n]"
 
 
 def test_comma_proxy_list_indented_remove():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.remove(comma_proxy_list[0])
     assert red.dumps() == "[]"
 
 
 def test_comma_proxy_list_indented_remove_2_at_top():
-    red = RedBaron("[\n    2,\n    1,\n]")
+    red = RedBaron("[\n   2,\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list.remove(comma_proxy_list[0])
-    assert red.dumps() == "[\n    1,\n]"
+    assert red.dumps() == "[\n   1,\n]"
 
 
 def test_comma_proxy_list_indented_set_item():
-    red = RedBaron("[\n    1,\n]")
+    red = RedBaron("[\n   1,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list[0] = "42"
     assert comma_proxy_list[0].type == "int"
@@ -986,27 +986,27 @@ def test_comma_proxy_list_indented_set_item():
     comma_proxy_list[0] = "plop"
     assert comma_proxy_list[0].type == "name"
     assert comma_proxy_list[0].value == "plop"
-    assert red.dumps() == "[\n    plop,\n]"
+    assert red.dumps() == "[\n   plop,\n]"
 
 
 def test_comma_proxy_list_indented_set_slice():
-    red = RedBaron("[\n    1,\n    2,\n    3,\n]")
+    red = RedBaron("[\n   1,\n   2,\n   3,\n]")
     comma_proxy_list = red[0].value
     comma_proxy_list[1:2] = ["42", "31", "23"]
-    assert red.dumps() == "[\n    1,\n    42,\n    31,\n    23,\n    3,\n]"
+    assert red.dumps() == "[\n   1,\n   42,\n   31,\n   23,\n   3,\n]"
 
 
 def test_comma_proxy_list_indented_delslice():
-    red = RedBaron("[\n    1,\n    2,\n    3,\n    4,\n    5,\n    6,\n]")
+    red = RedBaron("[\n   1,\n   2,\n   3,\n   4,\n   5,\n   6,\n]")
     comma_proxy_list = red[0].value
     del comma_proxy_list[1:4]
-    assert red.dumps() == "[\n    1,\n    5,\n    6,\n]"
+    assert red.dumps() == "[\n   1,\n   5,\n   6,\n]"
 
 
 comma_proxy_list_indented_code_to_test = """
 with stuff:
     a = [
-        1,
+      1,
     ]
 """
 
@@ -1014,8 +1014,8 @@ def test_comma_proxy_list_indented_delslice():
 comma_proxy_list_indented_code_to_test_expected_result = """
 with stuff:
     a = [
-        1,
-        2,
+      1,
+      2,
     ]
 """
 
@@ -1025,6 +1025,69 @@ def test_comma_proxy_list_indented_in_indentation_case():
     assert red.dumps() == comma_proxy_list_indented_code_to_test_expected_result
 
 
+def test_comma_proxy_list_indented_comment_before_end():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux',\n   # foo\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux',\n   'quuz': 'quuz',\n   # foo\n}"
+
+
+def test_comma_proxy_list_indented_inline_comment_before_end():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux', # something about baz\n   # foo\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux', # something about baz\n   'quuz': 'quuz',\n   # foo\n}"
+
+
+def test_comma_proxy_list_indented_inline_comment_before_end2():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux', # something about baz\n   # foo\n}")
+    red[0].value.insert(1, "'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'quuz': 'quuz',\n   'baz': 'quux', # something about baz\n   # foo\n}"
+
+
+def test_comma_proxy_list_indented_comment_before_end_no_trailing_comma():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux'\n   # foo\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux'\n   # foo\n   ,\n   'quuz': 'quuz'}"
+
+
+def test_comma_proxy_list_indented_comment_separate_end_of_line_end_of_list():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux',  # baz\n   # foo\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux',  # baz\n   'quuz': 'quuz',\n   # foo\n}"
+
+
+def test_comma_proxy_list_indented_infer_separator():
+    red = RedBaron("{\n   'baz': (\n      123),\n   'foo': 'bar',\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   'baz': (\n      123),\n   'foo': 'bar',\n   'quuz': 'quuz',\n}"
+
+
+def test_comma_proxy_list_indented_comment_preserved_end_of_list_no_trailing_comma():
+    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux',  # baz\n   'asdf': 'blah'\n}")
+    del red[0].value[-1]
+    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux',  # baz\n}"
+
+
+# XXX If there's no trailing comma, we currently have no way to preserve end-of-list whitespace.
+#def test_comma_proxy_list_indented_end_of_list_no_trailing_comma():
+#    red = RedBaron("{\n   'foo': 'bar',\n   'baz': 'quux',\n   'quuz': 'asdf'\n}")
+#    del red[0].value[-1]
+#    assert red.dumps() == "{\n   'foo': 'bar',\n   'baz': 'quux'\n}"
+
+
+def test_comma_proxy_list_indented_comment_at_start():
+    red = RedBaron("{\n   # foo is my favorite\n   'foo': 'bar',\n   'baz': 'quux',\n}")
+    red[0].value.append("'quuz': 'quuz'")
+    assert red.dumps() == "{\n   # foo is my favorite\n   'foo': 'bar',\n   'baz': 'quux',\n   'quuz': 'quuz',\n}"
+
+
+def test_comma_proxy_list_strange_indentation():
+    red = RedBaron("def __init__(self, a,\n   b, c): pass\n")
+    red[0].arguments.pop()
+    assert red.dumps() == "def __init__(self, a,\n   b): pass\n"
+    red[0].arguments.pop()
+    assert red.dumps() == "def __init__(self, a): pass\n"
+
+
 def test_decorator_line_proxy_with_blank_line_list_len_empty():
     red = RedBaron("def a():\n    pass\n")
     assert len(red[0].decorators) == 0