From dd06cd953bff680aee8f92acfe70233f42c106aa Mon Sep 17 00:00:00 2001 From: Duncan Findlay Date: Fri, 14 Jul 2017 17:31:46 -0700 Subject: [PATCH] Improve and simplify the CommaProxyList algorithm. With this change CommaProxyList better handles and preserves non-standard (and even inconsistent) indentation, rather than assuming indented lists are indented by four spaces (fixes #100). It also is significantly faster in very large files by avoiding use of ".indentation", which can require iterating over every element in the file. End-of-list comments are preserved at the end of the list, and inline comments are also preserved. --- redbaron/base_nodes.py | 177 +++++++++++++++++++++++++++------------ tests/test_proxy_list.py | 131 +++++++++++++++++++++-------- 2 files changed, 221 insertions(+), 87 deletions(-) diff --git a/redbaron/base_nodes.py b/redbaron/base_nodes.py index ff63540b..cb51d070 100644 --- a/redbaron/base_nodes.py +++ b/redbaron/base_nodes.py @@ -1490,24 +1490,64 @@ def __init__(self, node_list, on_attribute="value"): super(CommaProxyList, self).__init__(node_list, on_attribute=on_attribute) self.style = "indented" if any(self.node_list('comma', recursive=False).map(lambda x: x('endl'))) else "flat" - # XXX will likely break if the user modify the formatting of the list, - # I don't like that - self.has_trailing = self.node_list and self.node_list[-1].type == "comma" + def _infer_middle_indentation(self): + """Determines how far to indent nodes for an indented list.""" + # Find a non-trailing comma node, and use the existing indentation. This + # only works if we have a list with more than 2 elements. + for node in self.node_list[:-1]: + if node.type != "comma": + continue + endls = node.find_all("endl") + if endls: + return endls[-1].indent + # Otherwise, take the first element's indent level. This can be + # expensive. + if self.node_list: + return self.node_list[0].indentation + # Empty list, so just assume 4 spaces from parent's indent. + return self.parent.indentation + " " + + def _get_middle_separator(self, indent=None): + """Builds a new middle separator (comma). + + If indentation is provided, the comma is followed by a newline with the + given indentation. Otherwise, it's a single space. + """ + if indent is not None: + return redbaron.nodes.CommaNode( + {"type": "comma", "first_formatting": [], + "second_formatting": [{ + "type": "endl", + "indent": indent, + "formatting": [], "value": "\n"}]}) + else: + return redbaron.nodes.CommaNode( + {"type": "comma", "first_formatting": [], + "second_formatting": [{"type": "space", "value": " "}]}) - def _get_middle_separator(self): - if self.style == "indented": - return redbaron.nodes.CommaNode({"type": "comma", "first_formatting": [], "second_formatting": [ - {"type": "endl", "indent": self.parent.indentation + " ", "formatting": [], "value": "\n"}]}) + def _split_formatting(self, node_list): + """Splits a list of formatting nodes on the first endl. - return redbaron.nodes.CommaNode( - {"type": "comma", "first_formatting": [], "second_formatting": [{"type": "space", "value": " "}]}) + This is useful for determining which formatting nodes logically belong + with the following element. + """ + endl_found = -1 + for i, fnode in enumerate(node_list): + if fnode.type == "endl": + endl_found = i + break + + # Anything *after* the first endl is associated with the end of the + # list. + if endl_found >= 0: + return (node_list[:i+1], node_list[i+1:]) + return node_list, [] def _generate_expected_list(self): - def generate_separator(): - separator = self._get_middle_separator() - separator.parent = self.node_list - separator.on_attribute = self.on_attribute - return separator + """Converts self.data into a list of nodes.""" + + middle_indentation = (self._infer_middle_indentation() + if self.style == "indented" else None) # XXX will break comments if not self.data: @@ -1515,48 +1555,79 @@ def generate_separator(): self.parent.second_formatting = [] return [] - expected_list = [] + trailing_indent = None + end_of_list_formatting = None + has_trailing_comma = False + + # The basic idea here is that every element of the list is stored in + # self.data, along with the CommaNode that follows it (if + # applicable). We have to turn this into a flat list of nodes, and fix + # the formatting of the CommaNodes so that the list looks reasonable. + # + # One of the elements of self.data may have a CommaNode that was + # previously the last element of the list. This will have different + # formatting than a CommaNode elsewhere in the list. + # + # To make things simpler, we start by converting this trailing CommaNode + # into a standard middle-separator (if it exists), by pulling it from + # self.node_list. + if self.node_list and self.node_list[-1].type == "comma": + old_trailing_comma = self.node_list[-1] + # Keep comments associated with the end of list for later. + keep, end_of_list_formatting = self._split_formatting( + old_trailing_comma.second_formatting) + if keep[-1].type == "endl": + # Change the indent level. + trailing_indent = keep[-1].indent + if middle_indentation: + keep[-1].indent = middle_indentation + old_trailing_comma.second_formatting = keep + has_trailing_comma = True + elif self.node_list: + # We may still need to know the trailing indent, in case we add a + # trailing comma later. + endls = self.node_list[-1].find_all("endl") + if endls: + trailing_indent = endls[-1].indent - for position, i in enumerate(self.data): - is_last = position == len(self.data) - 1 - expected_list.append(i[0]) - # XXX this will need refactoring... - if i[1] is not None: - # here we encounter a middle value that should have formatting - # to separate between the intems but has not so we add it - # this happen because a new value has been added after this one - if not is_last and not i[1]: - expected_list.append(generate_separator()) - - # comma list doesn't have trailing but has a comma at its end, remove it - elif is_last and not self.has_trailing and i[1] and i[1][0].type == "comma": - # XXX this will likely break comments if presents at the end of the list - pass - else: - expected_list += i[1] + expected_list = [] - # XXX will break comments - if self.style == "indented": - if not expected_list[-1].second_formatting.endl: - raise Exception( - "It appears that you have indentation in your CommaList, for now RedBaron doesn't know how to handle this situation (which requires a lot of work), sorry about that. You can find more information here https://github.com/PyCQA/redbaron/issues/100") - elif expected_list[-1].second_formatting.endl.indent != self.parent.indentation + " " * 4: - expected_list[-1].second_formatting.endl.indent = self.parent.indentation + " " * 4 + # Now, iterate through self.data and add things to our node list. Every + # element either has an associated middle-separator, or no associated + # formatting. + for position, (elem, formatting) in enumerate(self.data): + if formatting: + expected_list.append(elem) + expected_list.extend(formatting) else: - # here we generate the new expected formatting - # None is used as a sentry value for newly inserted values in the proxy list - if not is_last: - expected_list.append(generate_separator()) - elif self.has_trailing: - expected_list.append(generate_separator()) - expected_list[-1].second_formatting[0].indent = "" - - if expected_list and self.has_trailing and self.style == "indented": - if not expected_list[-1].second_formatting.endl: - raise Exception( - "It appears that you have indentation in your CommaList, for now RedBaron doesn't know how to handle this situation (which requires a lot of work), sorry about that. You can find more information here https://github.com/PyCQA/redbaron/issues/100") - elif expected_list[-1].second_formatting.endl.indent != self.parent.indentation: - expected_list[-1].second_formatting.endl.indent = self.parent.indentation + if middle_indentation is not None: + # Fix indentation, if applicable. + endls = elem.find_all("endl") + if endls: + endls[-1].indent = middle_indentation + + separator = self._get_middle_separator(middle_indentation) + separator.parent = self.node_list + separator.on_attribute = self.on_attribute + expected_list.append(elem) + expected_list.append(separator) + + # Ok, now adjust the last element of the list, which contains a middle + # separator. We want to change it to be a trailing separator. + if expected_list and expected_list[-1].type == "comma": + trailing_comma = expected_list[-1] + if not has_trailing_comma and not trailing_comma.find("comment"): + # We don't want a trailing comma. + del expected_list[-1] + trailing_comma = None + + if trailing_comma: + endls = trailing_comma.find_all("endl") + if endls and trailing_indent is not None: + endls[-1].indent = trailing_indent + if end_of_list_formatting: + trailing_comma.second_formatting.extend( + end_of_list_formatting) return expected_list diff --git a/tests/test_proxy_list.py b/tests/test_proxy_list.py index 23bd170a..a6ea765b 100644 --- a/tests/test_proxy_list.py +++ b/tests/test_proxy_list.py @@ -1,5 +1,4 @@ #!/usr/bin/python -# -*- coding:Utf-8 -*- """ Tests the rendering feature """ @@ -847,7 +846,8 @@ def test_regression_help_proxy_list(): red = RedBaron("(1, 2)") red[0].value.node_list.help() - +# Many of these tests have test code using 3 spaces for indenting to make sure +# we're not relying on a hard-coded assumption of 4 spaces somewhere. def test_comma_proxy_list_indented_len_not_empty(): red = RedBaron("[\n 1,\n 2,\n 3,\n]") comma_proxy_list = red[0].value @@ -877,25 +877,25 @@ def test_comma_proxy_list_indented_detect_style(): def test_comma_proxy_list_indented_insert_2_at_top(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.insert(0, "2") - assert red.dumps() == "[\n 2,\n 1,\n]" + assert red.dumps() == "[\n 2,\n 1,\n]" def test_comma_proxy_list_indented_insert_2(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.insert(1, "2") assert comma_proxy_list.style == "indented" - assert red.dumps() == "[\n 1,\n 2,\n]" + assert red.dumps() == "[\n 1,\n 2,\n]" def test_comma_proxy_list_indented_insert_2_middle(): - red = RedBaron("[\n 1,\n 3,\n]") + red = RedBaron("[\n 1,\n 3,\n]") comma_proxy_list = red[0].value comma_proxy_list.insert(1, "2") - assert red.dumps() == "[\n 1,\n 2,\n 3,\n]" + assert red.dumps() == "[\n 1,\n 2,\n 3,\n]" # XXX need to rethink this behavior @@ -908,77 +908,77 @@ def test_comma_proxy_list_indented_insert_2_middle(): def test_comma_proxy_list_indented_append_2(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.append("2") - assert red.dumps() == "[\n 1,\n 2,\n]" + assert red.dumps() == "[\n 1,\n 2,\n]" def test_comma_proxy_list_indented_pop(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.pop(0) assert red.dumps() == "[]" def test_comma_proxy_list_indented_pop_2_at_top(): - red = RedBaron("[\n 2,\n 1,\n]") + red = RedBaron("[\n 2,\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.pop(0) - assert red.dumps() == "[\n 1,\n]" + assert red.dumps() == "[\n 1,\n]" def test_comma_proxy_list_indented_pop_2(): - red = RedBaron("[\n 1,\n 2,\n]") + red = RedBaron("[\n 1,\n 2,\n]") comma_proxy_list = red[0].value comma_proxy_list.pop(1) - assert red.dumps() == "[\n 1,\n]" + assert red.dumps() == "[\n 1,\n]" def test_comma_proxy_list_indented_pop_2_middle(): - red = RedBaron("[\n 1,\n 2,\n 3,\n]") + red = RedBaron("[\n 1,\n 2,\n 3,\n]") comma_proxy_list = red[0].value comma_proxy_list.pop(1) - assert red.dumps() == "[\n 1,\n 3,\n]" + assert red.dumps() == "[\n 1,\n 3,\n]" def test_comma_proxy_list_indented_pop_no_index(): - red = RedBaron("[\n 1,\n 2,\n 3,\n]") + red = RedBaron("[\n 1,\n 2,\n 3,\n]") comma_proxy_list = red[0].value comma_proxy_list.pop() - assert red.dumps() == "[\n 1,\n 2,\n]" + assert red.dumps() == "[\n 1,\n 2,\n]" def test_comma_proxy_list_indented_del(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value del comma_proxy_list[0] assert red.dumps() == "[]" def test_comma_proxy_list_indented_del_2_at_top(): - red = RedBaron("[\n 2,\n 1,\n]") + red = RedBaron("[\n 2,\n 1,\n]") comma_proxy_list = red[0].value del comma_proxy_list[0] - assert red.dumps() == "[\n 1,\n]" + assert red.dumps() == "[\n 1,\n]" def test_comma_proxy_list_indented_remove(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.remove(comma_proxy_list[0]) assert red.dumps() == "[]" def test_comma_proxy_list_indented_remove_2_at_top(): - red = RedBaron("[\n 2,\n 1,\n]") + red = RedBaron("[\n 2,\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list.remove(comma_proxy_list[0]) - assert red.dumps() == "[\n 1,\n]" + assert red.dumps() == "[\n 1,\n]" def test_comma_proxy_list_indented_set_item(): - red = RedBaron("[\n 1,\n]") + red = RedBaron("[\n 1,\n]") comma_proxy_list = red[0].value comma_proxy_list[0] = "42" assert comma_proxy_list[0].type == "int" @@ -986,27 +986,27 @@ def test_comma_proxy_list_indented_set_item(): comma_proxy_list[0] = "plop" assert comma_proxy_list[0].type == "name" assert comma_proxy_list[0].value == "plop" - assert red.dumps() == "[\n plop,\n]" + assert red.dumps() == "[\n plop,\n]" def test_comma_proxy_list_indented_set_slice(): - red = RedBaron("[\n 1,\n 2,\n 3,\n]") + red = RedBaron("[\n 1,\n 2,\n 3,\n]") comma_proxy_list = red[0].value comma_proxy_list[1:2] = ["42", "31", "23"] - assert red.dumps() == "[\n 1,\n 42,\n 31,\n 23,\n 3,\n]" + assert red.dumps() == "[\n 1,\n 42,\n 31,\n 23,\n 3,\n]" def test_comma_proxy_list_indented_delslice(): - red = RedBaron("[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n]") + red = RedBaron("[\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n]") comma_proxy_list = red[0].value del comma_proxy_list[1:4] - assert red.dumps() == "[\n 1,\n 5,\n 6,\n]" + assert red.dumps() == "[\n 1,\n 5,\n 6,\n]" comma_proxy_list_indented_code_to_test = """ with stuff: a = [ - 1, + 1, ] """ @@ -1014,8 +1014,8 @@ def test_comma_proxy_list_indented_delslice(): comma_proxy_list_indented_code_to_test_expected_result = """ with stuff: a = [ - 1, - 2, + 1, + 2, ] """ @@ -1025,6 +1025,69 @@ def test_comma_proxy_list_indented_in_indentation_case(): assert red.dumps() == comma_proxy_list_indented_code_to_test_expected_result +def test_comma_proxy_list_indented_comment_before_end(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux',\n # foo\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux',\n 'quuz': 'quuz',\n # foo\n}" + + +def test_comma_proxy_list_indented_inline_comment_before_end(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux', # something about baz\n # foo\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux', # something about baz\n 'quuz': 'quuz',\n # foo\n}" + + +def test_comma_proxy_list_indented_inline_comment_before_end2(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux', # something about baz\n # foo\n}") + red[0].value.insert(1, "'quuz': 'quuz'") + assert red.dumps() == "{\n 'foo': 'bar',\n 'quuz': 'quuz',\n 'baz': 'quux', # something about baz\n # foo\n}" + + +def test_comma_proxy_list_indented_comment_before_end_no_trailing_comma(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux'\n # foo\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux'\n # foo\n ,\n 'quuz': 'quuz'}" + + +def test_comma_proxy_list_indented_comment_separate_end_of_line_end_of_list(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux', # baz\n # foo\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux', # baz\n 'quuz': 'quuz',\n # foo\n}" + + +def test_comma_proxy_list_indented_infer_separator(): + red = RedBaron("{\n 'baz': (\n 123),\n 'foo': 'bar',\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n 'baz': (\n 123),\n 'foo': 'bar',\n 'quuz': 'quuz',\n}" + + +def test_comma_proxy_list_indented_comment_preserved_end_of_list_no_trailing_comma(): + red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux', # baz\n 'asdf': 'blah'\n}") + del red[0].value[-1] + assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux', # baz\n}" + + +# XXX If there's no trailing comma, we currently have no way to preserve end-of-list whitespace. +#def test_comma_proxy_list_indented_end_of_list_no_trailing_comma(): +# red = RedBaron("{\n 'foo': 'bar',\n 'baz': 'quux',\n 'quuz': 'asdf'\n}") +# del red[0].value[-1] +# assert red.dumps() == "{\n 'foo': 'bar',\n 'baz': 'quux'\n}" + + +def test_comma_proxy_list_indented_comment_at_start(): + red = RedBaron("{\n # foo is my favorite\n 'foo': 'bar',\n 'baz': 'quux',\n}") + red[0].value.append("'quuz': 'quuz'") + assert red.dumps() == "{\n # foo is my favorite\n 'foo': 'bar',\n 'baz': 'quux',\n 'quuz': 'quuz',\n}" + + +def test_comma_proxy_list_strange_indentation(): + red = RedBaron("def __init__(self, a,\n b, c): pass\n") + red[0].arguments.pop() + assert red.dumps() == "def __init__(self, a,\n b): pass\n" + red[0].arguments.pop() + assert red.dumps() == "def __init__(self, a): pass\n" + + def test_decorator_line_proxy_with_blank_line_list_len_empty(): red = RedBaron("def a():\n pass\n") assert len(red[0].decorators) == 0