diff --git a/CHANGELOG.md b/CHANGELOG.md index d86bb9d6..79a96ad2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ### Formatting Changes + Bug Fixes +- sqlfmt no longer merges lines that contiain comments, in order to preserve the positional meaning of those comments ([#348](https://github.com/tconbeer/sqlfmt/issues/348) - thank you, [@rileyschack](https://github.com/rileyschack) and [@IanEdington](https://github.com/IanEdington)!). - sqlfmt no longer merges together lines containing multiline jinja blocks unless those lines start with an operator or comma ([#365](https://github.com/tconbeer/sqlfmt/issues/365) - thank you, [@gavlt](https://github.com/gavlt)!). - fixed a bug where adding a jinja end tag (e.g., `{% endif %}`) to a line could cause bad formatting of everything on that line diff --git a/src/sqlfmt/comment.py b/src/sqlfmt/comment.py index 698b72d1..72752975 100644 --- a/src/sqlfmt/comment.py +++ b/src/sqlfmt/comment.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from typing import ClassVar, Iterator, Tuple -from sqlfmt.exception import InlineCommentException from sqlfmt.token import Token @@ -68,20 +67,16 @@ def is_multiline(self) -> bool: """ return "\n" in self.token.token - def render_inline(self, max_length: int, content_length: int) -> str: + @property + def was_parsed_inline(self) -> bool: + return not self.is_standalone and not self.is_multiline + + def render_inline(self) -> str: """ - For a Comment, returns the string for properly formatting this Comment - inline, after content_length characters of non-comment Nodes + Renders a comment as an inline comment, assuming it'll fit. """ - if self.is_standalone: - raise InlineCommentException("Can't inline standalone comment") - else: - inline_prefix = " " * 2 - rendered = inline_prefix + str(self) - if content_length + len(rendered) > max_length: - raise InlineCommentException("Comment too long to be inlined") - else: - return inline_prefix + str(self) + inline_prefix = " " * 2 + return f"{inline_prefix}{self}" def render_standalone(self, max_length: int, prefix: str) -> str: """ diff --git a/src/sqlfmt/exception.py b/src/sqlfmt/exception.py index c39c6cf0..d192ceef 100644 --- a/src/sqlfmt/exception.py +++ b/src/sqlfmt/exception.py @@ -64,15 +64,6 @@ class SqlfmtControlFlowException(Exception): pass -class InlineCommentException(SqlfmtControlFlowException): - """ - Raised by the Line class if we try to render a comment - inline that is too long - """ - - pass - - class StopRulesetLexing(SqlfmtControlFlowException): """ Raised by the Analyzer or one of its actions to indicate diff --git a/src/sqlfmt/jinjafmt.py b/src/sqlfmt/jinjafmt.py index b7f2fc74..5da52d68 100644 --- a/src/sqlfmt/jinjafmt.py +++ b/src/sqlfmt/jinjafmt.py @@ -367,9 +367,11 @@ def format_line(self, line: Line) -> List[Line]: chain( *[ self.format_line(new_line) - for new_line in [ + for new_line, _ in [ splitter.split_at_index(line, 0, i, line.comments), - splitter.split_at_index(line, i, -1, []), + splitter.split_at_index( + line, i, -1, [], is_tail=True + ), ] ] ) diff --git a/src/sqlfmt/line.py b/src/sqlfmt/line.py index 0344dd82..14980eec 100644 --- a/src/sqlfmt/line.py +++ b/src/sqlfmt/line.py @@ -2,7 +2,6 @@ from typing import List, Optional, Tuple from sqlfmt.comment import Comment -from sqlfmt.exception import InlineCommentException from sqlfmt.node import Node from sqlfmt.token import Token @@ -88,39 +87,49 @@ def prefix(self) -> str: prefix = INDENT * self.depth[0] return prefix + def has_inline_comment(self, max_length: int) -> bool: + """ + Returns true if the line has an attached comment that was parsed + as an inline comment, and if the line and comment are short + enough to be rendered inline. Returns false otherwise + """ + content = str(self).rstrip() + if self.comments and self.comments[-1].was_parsed_inline: + inline_render = self.comments[-1].render_inline() + if len(content) + len(inline_render) <= max_length: + return True + return False + def render_with_comments(self, max_length: int) -> str: """ Returns a string that represents the properly-formatted Line, including associated comments """ - content = str(self) - rendered = content - if len(self.comments) == 1: - # standalone or multiline comment - if self.nodes[0].is_newline: - rendered = f"{self.prefix}{self.comments[0]}" - # inline comment + content = str(self).rstrip() + rendered_lines: List[str] = [] + for comment in self.comments: + if comment.is_multiline or comment.is_standalone: + rendered_lines.append( + comment.render_standalone(max_length=max_length, prefix=self.prefix) + ) else: - try: - comment = self.comments[0].render_inline( - max_length=max_length, content_length=len(content.rstrip()) - ) - rendered = f"{content.rstrip()}{comment}" - except InlineCommentException: - comment = self.comments[0].render_standalone( - max_length=max_length, prefix=self.prefix - ) - rendered = f"{comment}{content}" - # wrap comments above; note that str(comment) is newline-terminated - elif len(self.comments) > 1: - comment = "".join( - [ - c.render_standalone(max_length=max_length, prefix=self.prefix) - for c in self.comments - ] + # comment is potentially an inline comment, and we'll handle + # that below. Inline comments must be the last comment + pass + + if self.has_inline_comment(max_length=max_length): + rendered_lines.append(f"{content}{self.comments[-1].render_inline()}") + elif self.comments and self.comments[-1].was_parsed_inline: + # comment was parsed inline but needs to be written standalone + rendered_lines.append( + self.comments[-1].render_standalone( + max_length=max_length, prefix=self.prefix + ) ) - rendered = f"{comment}{content}" - return rendered + rendered_lines.append(f"{self}") + else: + rendered_lines.append(f"{self}") + return "".join(rendered_lines) @classmethod def from_nodes( diff --git a/src/sqlfmt/merger.py b/src/sqlfmt/merger.py index b789d934..4ac9d90e 100644 --- a/src/sqlfmt/merger.py +++ b/src/sqlfmt/merger.py @@ -50,9 +50,8 @@ def safe_create_merged_line(self, lines: List[Line]) -> List[Line]: except CannotMergeException: return lines - @classmethod def _extract_components( - cls, lines: Iterable[Line] + self, lines: Iterable[Line] ) -> Tuple[List[Node], List[Comment]]: """ Given a list of lines, return 2 components: @@ -67,7 +66,27 @@ def _extract_components( final_newline: Optional[Node] = None allow_multiline_jinja = True has_multiline_jinja = False + has_inline_comment_above = False for line in lines: + # only merge lines with comments if it's a standalone comment + # above the first line + if line.comments: + if nodes or has_inline_comment_above: + raise CannotMergeException( + "Can't merge lines with comments unless the comments " + "are above the first line" + ) + elif line.has_inline_comment(self.mode.line_length): + has_inline_comment_above = True + # make an exception for inline comments followed by + # a lonely comma (e.g., leading commas with inline comments) + elif has_inline_comment_above: + if not line.is_standalone_comma: + raise CannotMergeException( + "Can't merge lines with inline comments unless " + "the following line is a single standalone comma" + ) + if has_multiline_jinja and not ( line.starts_with_operator or line.starts_with_comma ): @@ -76,7 +95,7 @@ def _extract_components( ) # skip over newline nodes content_nodes = [ - cls._raise_unmergeable(node, allow_multiline_jinja) + self._raise_unmergeable(node, allow_multiline_jinja) for node in line.nodes if not node.is_newline ] diff --git a/src/sqlfmt/splitter.py b/src/sqlfmt/splitter.py index db41f684..417f6728 100644 --- a/src/sqlfmt/splitter.py +++ b/src/sqlfmt/splitter.py @@ -32,7 +32,13 @@ def maybe_split(self, line: Line) -> List[Line]: if head == 0: new_lines.append(line) else: - new_lines.append(self.split_at_index(line, head, i, comments)) + new_line, comments = self.split_at_index( + line, head, i, comments, is_tail=True + ) + assert ( + not comments + ), "Comments must be empty here or we'll drop them" + new_lines.append(new_line) return new_lines elif ( i > head @@ -40,9 +46,8 @@ def maybe_split(self, line: Line) -> List[Line]: and not node.formatting_disabled and (always_split_after or self.maybe_split_before(node)) ): - new_line = self.split_at_index(line, head, i, comments) + new_line, comments = self.split_at_index(line, head, i, comments) new_lines.append(new_line) - comments = [] # only first split gets original comments head = i # node now follows a new newline node, so we need to update # its previous node (this can impact its depth) @@ -50,7 +55,9 @@ def maybe_split(self, line: Line) -> List[Line]: always_split_after, never_split_after = self.maybe_split_after(node) - new_lines.append(self.split_at_index(line, head, -1, comments)) + new_line, comments = self.split_at_index(line, head, -1, comments, is_tail=True) + assert not comments, "Comments must be empty here or we'll drop them" + new_lines.append(new_line) return new_lines def maybe_split_before(self, node: Node) -> bool: @@ -118,10 +125,19 @@ def maybe_split_after(self, node: Node) -> Tuple[bool, bool]: return False, False def split_at_index( - self, line: Line, head: int, index: int, comments: List[Comment] - ) -> Line: + self, + line: Line, + head: int, + index: int, + comments: List[Comment], + is_tail: bool = False, + ) -> Tuple[Line, List[Comment]]: """ - Return a new line comprised of the nodes line[head:index], plus a newline node + Return a new line comprised of the nodes line[head:index], plus a newline node. + + Also return a list of comments that need to be included in the tail + of the split line. This includes inline comments and if the head + is just a comma, all comments. """ if index == -1: new_nodes = line.nodes[head:] @@ -129,12 +145,26 @@ def split_at_index( assert index > head, "Cannot split at start of line!" new_nodes = line.nodes[head:index] + assert new_nodes, "Cannot split a line without nodes!" + + if is_tail: + head_comments, tail_comments = comments, [] + elif comments: + if new_nodes[0].is_comma: + head_comments, tail_comments = [], comments + elif not comments[-1].is_standalone: + head_comments, tail_comments = comments[:-1], [comments[-1]] + else: + head_comments, tail_comments = comments, [] + else: + head_comments, tail_comments = [], [] + new_line = Line.from_nodes( previous_node=new_nodes[0].previous_node, nodes=new_nodes, - comments=comments, + comments=head_comments, ) if not new_line.nodes[-1].is_newline: self.node_manager.append_newline(new_line) - return new_line + return new_line, tail_comments diff --git a/tests/data/unformatted/101_multiline.sql b/tests/data/unformatted/101_multiline.sql index a6e29d3d..13ca7687 100644 --- a/tests/data/unformatted/101_multiline.sql +++ b/tests/data/unformatted/101_multiline.sql @@ -66,7 +66,7 @@ with # And this is a nice multiline jinja comment # that we will also handle. #} -/* what!?! */ select * from renamed -where true +where + true /* what!?! */ diff --git a/tests/data/unformatted/102_lots_of_comments.sql b/tests/data/unformatted/102_lots_of_comments.sql index 29e06bfe..f036886c 100644 --- a/tests/data/unformatted/102_lots_of_comments.sql +++ b/tests/data/unformatted/102_lots_of_comments.sql @@ -29,7 +29,7 @@ select -- not distinct another_thing_entirely, yet_another_field -- another standalone comment -from a_really_long_table; -- with a super long comment that won't fit here and needs to move up +from a_really_long_table; -- with a super long comment that won't fit here and needs to move up above the semicolon -- sometimes we like really long comments -- that wrap to many lines. And may even @@ -39,10 +39,17 @@ from a_really_long_table; -- with a super long comment that won't fit here and n select 1 )))))__SQLFMT_OUTPUT__((((( with - one as (select 1), -- short - -- too long to be one-lined with the comment inside so it'll have to go above - two as (select a_long_enough_field, another_long_enough_field), - three as (select 1), -- short enough + one as ( + select -- short + 1 + ), + two as ( + -- too long to be one-lined with the comment inside so it'll have to go above + select a_long_enough_field, another_long_enough_field + ), + three as ( + select 1 + ), -- short enough four as ( -- too long to be one-lined with the comment inside so it'll have to go above -- and be split and wrapped @@ -55,14 +62,14 @@ select -- not distinct -- with a long comment that needs to wrap above this line one_really_long_field_name, -- a standalone comment - -- with another comment - a_short_field, + a_short_field, -- with another comment something_else, another_thing_entirely, yet_another_field -- another standalone comment --- with a super long comment that won't fit here and needs to move up from a_really_long_table +-- with a super long comment that won't fit here and needs to move up above the +-- semicolon ; -- sometimes we like really long comments diff --git a/tests/data/unformatted/113_utils_group_by.sql b/tests/data/unformatted/113_utils_group_by.sql index a92b2f4a..a040b4b9 100644 --- a/tests/data/unformatted/113_utils_group_by.sql +++ b/tests/data/unformatted/113_utils_group_by.sql @@ -26,4 +26,6 @@ select field_a, count(*) from my_table -where something_is_true {{ dbt_utils.group_by(10) }} -- todo: keep line break +where + something_is_true + {{ dbt_utils.group_by(10) }} -- todo: keep line break diff --git a/tests/data/unformatted/127_more_comments.sql b/tests/data/unformatted/127_more_comments.sql new file mode 100644 index 00000000..56d64a41 --- /dev/null +++ b/tests/data/unformatted/127_more_comments.sql @@ -0,0 +1,52 @@ +-- source: https://github.com/tconbeer/sqlfmt/issues/348 +with table_a as ( + select + /* Notice that this select statement can fit on a single line without comments */ + col1, + col2, -- col2 + /* Special column */ + special_column, + from {{ ref("table_a" )}} +) + +/* Some interesting comments above a CTE with a leading comma */ +, table_b as ( + select * + from {{ ref("table_b") }} +) + +select * +from table_a, table_b; + +select 1 +-- two +, 2 -- two inline +-- three +, 3 -- three inline +, 4 -- four inline +)))))__SQLFMT_OUTPUT__((((( +-- source: https://github.com/tconbeer/sqlfmt/issues/348 +with + table_a as ( + select + /* Notice that this select statement can fit on a single line without comments */ + col1, + col2, -- col2 + /* Special column */ + special_column, + from {{ ref("table_a") }} + ), + /* Some interesting comments above a CTE with a leading comma */ + table_b as (select * from {{ ref("table_b") }}) + +select * +from table_a, table_b +; + +select + 1, + -- two + 2, -- two inline + -- three + 3, -- three inline + 4 -- four inline diff --git a/tests/data/unformatted/200_base_model.sql b/tests/data/unformatted/200_base_model.sql index 073b8fd2..b893b058 100644 --- a/tests/data/unformatted/200_base_model.sql +++ b/tests/data/unformatted/200_base_model.sql @@ -61,8 +61,10 @@ with then nullif(full_name, '') when regexp_count(nullif(full_name, ''), ' ') = 1 then split_part(nullif(full_name, ''), ' ', 1) - -- let's explain what is going on here - else regexp_substr(nullif(full_name, ''), '.* .* ') + else + regexp_substr( + nullif(full_name, ''), '.* .* ' + ) -- let's explain what is going on here end ), 'TEST_USER' @@ -78,9 +80,13 @@ with from source - -- a very long comment about why we would exclude this user from this table - -- that we will wrap - where nvl(is_deleted, false) is false and id <> 123456 + where + + nvl(is_deleted, false) is false + and id + -- a very long comment about why we would exclude this user from this + -- table that we will wrap + <> 123456 ) select * diff --git a/tests/data/unformatted/209_rittman_int_web_events_sessionized.sql b/tests/data/unformatted/209_rittman_int_web_events_sessionized.sql index c144f1f9..3f7cb058 100644 --- a/tests/data/unformatted/209_rittman_int_web_events_sessionized.sql +++ b/tests/data/unformatted/209_rittman_int_web_events_sessionized.sql @@ -185,6 +185,8 @@ from ordered_conversion_tagged {% if var("product_warehouse_event_sources") %} with + events as ( + select * from {{ ref("int_web_events") }} /* {% if is_incremental() %} where visitor_id in ( select distinct visitor_id @@ -200,7 +202,7 @@ with ) {% endif %} */ - events as (select * from {{ ref("int_web_events") }}), + ), numbered as ( diff --git a/tests/data/unit_tests/test_splitter/test_simple_comment_split.sql b/tests/data/unit_tests/test_splitter/test_simple_comment_split.sql index e1bd8615..9283cdc9 100644 --- a/tests/data/unit_tests/test_splitter/test_simple_comment_split.sql +++ b/tests/data/unit_tests/test_splitter/test_simple_comment_split.sql @@ -1,6 +1,7 @@ select -- not distinct, just an ordinary select here, no big deal at all, it's okay really my_first_field + an_operation, -- here is a long comment to be wrapped above this line my_second_field, -- a short comment + -- standalone for third my_third_field + another_long_operation -- here is another long comment to be wrapped but not indented from my_really_long_data_source -- another comment that is a little bit too long to stay here where -- this should stay diff --git a/tests/functional_tests/test_general_formatting.py b/tests/functional_tests/test_general_formatting.py index 53ae9853..b9cc7480 100644 --- a/tests/functional_tests/test_general_formatting.py +++ b/tests/functional_tests/test_general_formatting.py @@ -40,6 +40,7 @@ "unformatted/124_bq_compound_types.sql", "unformatted/125_numeric_constants.sql", "unformatted/126_blank_lines.sql", + "unformatted/127_more_comments.sql", "unformatted/200_base_model.sql", "unformatted/201_basic_snapshot.sql", "unformatted/202_unpivot_macro.sql", diff --git a/tests/unit_tests/test_comment.py b/tests/unit_tests/test_comment.py index d62c0bb4..358a27a4 100644 --- a/tests/unit_tests/test_comment.py +++ b/tests/unit_tests/test_comment.py @@ -3,7 +3,6 @@ import pytest from sqlfmt.comment import Comment -from sqlfmt.exception import InlineCommentException from sqlfmt.token import Token, TokenType @@ -75,15 +74,8 @@ def test_render_inline( short_comment: Comment, nospace_comment: Comment, standalone_comment: Comment ) -> None: expected = " -- short comment\n" - assert short_comment.render_inline(max_length=88, content_length=20) == expected - assert nospace_comment.render_inline(max_length=88, content_length=20) == expected - with pytest.raises(InlineCommentException): - # can't inline a standalone comment - assert standalone_comment.render_inline(max_length=88, content_length=20) - - with pytest.raises(InlineCommentException): - # can't inline if the content is too long - assert short_comment.render_inline(max_length=88, content_length=80) + assert short_comment.render_inline() == expected + assert nospace_comment.render_inline() == expected @pytest.mark.parametrize( diff --git a/tests/unit_tests/test_line.py b/tests/unit_tests/test_line.py index fa5079d2..9a4581e4 100644 --- a/tests/unit_tests/test_line.py +++ b/tests/unit_tests/test_line.py @@ -210,11 +210,6 @@ def test_comment_rendering( ) inline_comment = Comment(token=comment_token, is_standalone=False) - node_manager.append_newline(bare_line) - bare_line.comments = [inline_comment] - expected_bare_render = normalized_comment + "\n" - assert bare_line.render_with_comments(88) == expected_bare_render - simple_line.comments = [inline_comment] expected_inline_render = ( str(simple_line).rstrip() + " " + normalized_comment + "\n" @@ -228,18 +223,6 @@ def test_comment_rendering( ) assert simple_line.render_with_comments(88) == expected_standalone_render - simple_line.comments = [standalone_comment, inline_comment] - expected_multiple_render = ( - simple_line.prefix - + normalized_comment - + "\n" - + simple_line.prefix - + normalized_comment - + "\n" - + str(simple_line) - ) - assert simple_line.render_with_comments(88) == expected_multiple_render - def test_long_comment_wrapping(simple_line: Line) -> None: last_node = simple_line.nodes[-1] diff --git a/tests/unit_tests/test_merger.py b/tests/unit_tests/test_merger.py index ceb522c7..61c0e198 100644 --- a/tests/unit_tests/test_merger.py +++ b/tests/unit_tests/test_merger.py @@ -21,10 +21,6 @@ def test_create_merged_line(merger: LineMerger) -> None: select able, baker, - /* a - multiline - comment - */ @@ -56,6 +52,26 @@ def test_create_merged_line(merger: LineMerger) -> None: _ = merger.create_merged_line(raw_query.lines[-6:-3]) +def test_create_merged_line_comments(merger: LineMerger) -> None: + + source_string = """ + select + able, + baker, + -- a comment + charlie, + """ + raw_query = merger.mode.dialect.initialize_analyzer( + merger.mode.line_length + ).parse_query(source_string) + + with pytest.raises(CannotMergeException) as exc_info: + # can't merge whitespace + _ = merger.create_merged_line(raw_query.lines) + + assert "comment" in str(exc_info.value) + + def test_basic_merge(merger: LineMerger) -> None: source_string = """ nullif( diff --git a/tests/unit_tests/test_splitter.py b/tests/unit_tests/test_splitter.py index 4ac29730..8a8ee446 100644 --- a/tests/unit_tests/test_splitter.py +++ b/tests/unit_tests/test_splitter.py @@ -78,13 +78,13 @@ def test_simple_comment_split( "-- not distinct, just an ordinary select here," " no big deal at all, it's okay really\n" ), - "-- here is a long comment to be wrapped above this line\n", "", + "-- here is a long comment to be wrapped above this line\n", "-- a short comment\n", + "-- standalone for third\n", "-- here is another long comment to be wrapped but not indented\n", "", "-- another comment that is a little bit too long to stay here\n", - "", "-- this should stay\n", "", "-- one last comment\n", @@ -407,3 +407,33 @@ def test_maybe_split_no_terminating_newline( ] assert actual_result == expected_result + + +def test_split_leading_comma_comment( + splitter: LineSplitter, default_analyzer: Analyzer +) -> None: + source_string = """ + select 1 + -- two + , 2 -- two inline + -- three + , 3 -- three inline + """.strip() + + raw_query = default_analyzer.parse_query(source_string) + assert len(raw_query.lines) == 3 + + split_lines: List[Line] = [] + for raw_line in raw_query.lines: + split_lines.extend(splitter.maybe_split(raw_line)) + + actual_result = [line.render_with_comments(88) for line in split_lines] + expected_result = [ + "select\n", + " 1\n", + " ,\n", + " -- two\n" " 2 -- two inline\n", + " ,\n", + " -- three\n" " 3 -- three inline\n", + ] + assert actual_result == expected_result