Skip to content

Commit

Permalink
fix #419: handle multiple c-style comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tconbeer committed May 31, 2023
1 parent 6485747 commit 80b3563
Show file tree
Hide file tree
Showing 14 changed files with 131 additions and 30 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

- fixes a bug where multiple c-style comments (e.g., `/* comment */`) on a single line would cause sqlfmt
to not include all comments in formatted output ([#419](https://github.com/tconbeer/sqlfmt/issues/419) - thank you [@aersam](https://github.com/aersam)!)
- adds a safety check to ensure comments are preserved in formatted output

## [0.18.2] - 2023-05-31

- fixes a bug where specifying both relative and absolute paths would cause sqlfmt to crash ([#426](https://github.com/tconbeer/sqlfmt/issues/426) - thank you for the issue and fix, [@smcgivern](https://github.com/smcgivern)!)
Expand Down
6 changes: 5 additions & 1 deletion src/sqlfmt/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ def add_comment_to_buffer(
"""
token = Token.from_match(source_string, match, TokenType.COMMENT)
is_standalone = (not bool(analyzer.node_buffer)) or "\n" in token.token
comment = Comment(token=token, is_standalone=is_standalone)
comment = Comment(
token=token,
is_standalone=is_standalone,
previous_node=analyzer.previous_node,
)
analyzer.comment_buffer.append(comment)
analyzer.pos = token.epos

Expand Down
37 changes: 37 additions & 0 deletions src/sqlfmt/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,40 @@ def _perform_safety_check(analyzer: Analyzer, raw_query: Query, result: str) ->
f"token at position {mismatch_pos}: raw: {mismatch_raw}; "
f"result: {mismatch_res}."
)

raw_comments = [
comment.token.token for line in raw_query.lines for comment in line.comments
]
result_comments = [
comment.token.token for line in result_query.lines for comment in line.comments
]
stripped_raw = "".join(
["".join(c.replace("--", "").replace("#", "").split()) for c in raw_comments]
)
stripped_res = "".join(
["".join(c.replace("--", "").replace("#", "").split()) for c in result_comments]
)
try:
assert stripped_raw == stripped_res
except AssertionError:
raw_len = len(stripped_raw)
result_len = len(stripped_res)
mismatch_pos = 0
mismatch_raw = ""
mismatch_res = ""

for i, (raw, res) in enumerate(zip_longest(stripped_raw, stripped_res)):
if raw is not res:
mismatch_pos = i
mismatch_raw = stripped_raw[i : i + 25]
mismatch_res = stripped_res[i : i + 25]
break

raise SqlfmtEquivalenceError(
"There was a problem formatting your query that "
"caused the safety check to fail. Please open an "
f"issue. Raw query had {raw_len} comment characters; formatted "
f"query had {result_len} comment characters. First mismatching "
f"character at position {mismatch_pos}: raw: {mismatch_raw}; "
f"result: {mismatch_res}."
)
13 changes: 9 additions & 4 deletions src/sqlfmt/comment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re
from dataclasses import dataclass
from typing import ClassVar, Iterator, Tuple
from typing import ClassVar, Iterator, Optional, Tuple

from sqlfmt.node import Node
from sqlfmt.token import Token


Expand All @@ -14,6 +15,7 @@ class Comment:

token: Token
is_standalone: bool
previous_node: Optional[Node]
comment_marker: ClassVar[re.Pattern] = re.compile(r"(--|#|/\*|\{#-?)([^\S\n]*)")

def __str__(self) -> str:
Expand Down Expand Up @@ -67,16 +69,19 @@ def is_multiline(self) -> bool:
"""
return "\n" in self.token.token

@property
def is_c_style(self) -> bool:
return self.token.token.startswith("/*")

@property
def is_inline(self) -> bool:
return not self.is_standalone and not self.is_multiline
return not self.is_standalone and not self.is_multiline and not self.is_c_style

def render_inline(self) -> str:
"""
Renders a comment as an inline comment, assuming it'll fit.
"""
inline_prefix = " " * 2
return f"{inline_prefix}{self}"
return f"{self}"

def render_standalone(self, max_length: int, prefix: str) -> str:
"""
Expand Down
9 changes: 4 additions & 5 deletions src/sqlfmt/line.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,17 @@ def render_with_comments(self, max_length: int) -> str:
"""
content = str(self).rstrip()
rendered_lines: List[str] = []
inline_comments: List[str] = []
for comment in self.comments:
if comment.is_multiline or comment.is_standalone:
rendered_lines.append(
comment.render_standalone(max_length=max_length, prefix=self.prefix)
)
else:
# comment is potentially an inline comment, and we'll handle
# that below. Inline comments must be the last comment
pass
inline_comments.append(comment.render_inline())

if self.has_inline_comment:
rendered_lines.append(f"{content}{self.comments[-1].render_inline()}")
if inline_comments:
rendered_lines.append(f"{content} {' '.join(inline_comments)}")
else:
rendered_lines.append(f"{self}")

Expand Down
9 changes: 8 additions & 1 deletion src/sqlfmt/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,14 @@ def _extract_components(
raise CannotMergeException(
"Can't merge lines with inline comments and other comments"
)
elif len(line.comments) == 1 and line.has_inline_comment:
elif (
len(line.comments) == 1
and len(line.nodes) > 1
and (
line.comments[0].is_inline
or line.comments[0].previous_node == line.nodes[-2]
)
):
# this is a comment that must be rendered inline,
# so it'll probably block merging unless the
# next line is just a comma
Expand Down
17 changes: 14 additions & 3 deletions src/sqlfmt/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,21 @@ def split_at_index(
# the only thing after the split is an operator + \n, so keep the
# comments with the stuff before the operator
head_comments, tail_comments = comments, []
elif comments[-1].is_inline:
head_comments, tail_comments = comments[:-1], [comments[-1]]
else:
head_comments, tail_comments = comments, []
head_comments, tail_comments = [], []
for comment in comments:
if (
comment.is_standalone
or comment.is_multiline
or comment.previous_node is None
):
head_comments.append(comment)
elif comment.is_inline:
tail_comments.append(comment)
elif comment.is_c_style and comment.previous_node in new_nodes:
head_comments.append(comment)
else:
tail_comments.append(comment)

else: # no comments
head_comments, tail_comments = [], []
Expand Down
4 changes: 2 additions & 2 deletions tests/data/unformatted/101_multiline.sql
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,5 @@ with
# that we will also handle.
#}
select *
from renamed
where true /* what!?! */
from renamed /* what!?! */
where true
15 changes: 15 additions & 0 deletions tests/data/unformatted/218_multiple_c_comments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SELECT t.customer_sk AS `Customer_IdDWH`, t.customer_id AS `Customer_IdOrig`, t.__identity AS `CustomerCreditLimit_Agg_IdDWH`, t.kreditlimit_intern /* not existing, proposal: credit_limit_intern (92%) */ AS `KreditlimitIntern`, t.kreditlimit_versichert /* not existing */ AS `KreditlimitVersichert`, t.valid_from /* not existing */ AS `ValidFrom`, t.valid_to /* not existing */ AS `ValidTo` FROM `gold`.`dim_customer_credit_limit_agg` AS t
)))))__SQLFMT_OUTPUT__(((((
select
t.customer_sk as `Customer_IdDWH`,
t.customer_id as `Customer_IdOrig`,
t.__identity as `CustomerCreditLimit_Agg_IdDWH`,
t.kreditlimit_intern /* not existing, proposal: credit_limit_intern (92%) */
as `KreditlimitIntern`,
t.kreditlimit_versichert /* not existing */
as `KreditlimitVersichert`,
t.valid_from /* not existing */
as `ValidFrom`,
t.valid_to /* not existing */
as `ValidTo`
from `gold`.`dim_customer_credit_limit_agg` as t
1 change: 1 addition & 0 deletions tests/functional_tests/test_general_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
"unformatted/215_gitlab_get_backup_table_command.sql",
"unformatted/216_gitlab_zuora_revenue_revenue_contract_line_source.sql",
"unformatted/217_dbt_unit_testing_csv.sql",
"unformatted/218_multiple_c_comments.sql",
"unformatted/300_jinjafmt.sql",
"unformatted/400_create_fn_and_select.sql",
"unformatted/401_explain_select.sql",
Expand Down
4 changes: 4 additions & 0 deletions tests/unit_tests/test_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def test_multiline_parsing(default_analyzer: Analyzer) -> None:
epos=310,
),
is_standalone=True,
previous_node=q.nodes[2],
),
Comment(
token=Token(
Expand All @@ -198,6 +199,7 @@ def test_multiline_parsing(default_analyzer: Analyzer) -> None:
epos=499,
),
is_standalone=True,
previous_node=q.nodes[17],
),
Comment(
token=Token(
Expand All @@ -211,6 +213,7 @@ def test_multiline_parsing(default_analyzer: Analyzer) -> None:
epos=837,
),
is_standalone=True,
previous_node=q.nodes[38],
),
Comment(
token=Token(
Expand All @@ -221,6 +224,7 @@ def test_multiline_parsing(default_analyzer: Analyzer) -> None:
epos=874,
),
is_standalone=False,
previous_node=q.nodes[42],
),
]

Expand Down
12 changes: 11 additions & 1 deletion tests/unit_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,19 @@ def test_perform_safety_check(default_mode: Mode) -> None:
"result: TokenType.NAME." in str(excinfo.value)
)

with pytest.raises(SqlfmtEquivalenceError) as excinfo:
# adds a comment
_perform_safety_check(
analyzer, raw_query, "select\n-- new comment\n 1, 2, 3\n"
)

assert (
"Raw query had 0 comment characters; formatted query had 10 comment characters"
in str(excinfo.value)
)

# does not raise
_perform_safety_check(analyzer, raw_query, "select\n 1, 2, 3\n")
_perform_safety_check(analyzer, raw_query, "select\n-- new comment\n 1, 2, 3\n")


@pytest.mark.parametrize(
Expand Down
18 changes: 9 additions & 9 deletions tests/unit_tests/test_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def short_comment() -> Comment:
t = Token(
type=TokenType.COMMENT, prefix=" ", token="-- short comment", spos=0, epos=16
)
comment = Comment(t, is_standalone=False)
comment = Comment(t, is_standalone=False, previous_node=None)
return comment


Expand All @@ -20,7 +20,7 @@ def short_mysql_comment() -> Comment:
t = Token(
type=TokenType.COMMENT, prefix=" ", token="# short comment", spos=0, epos=15
)
comment = Comment(t, is_standalone=False)
comment = Comment(t, is_standalone=False, previous_node=None)
return comment


Expand All @@ -29,7 +29,7 @@ def nospace_comment() -> Comment:
t = Token(
type=TokenType.COMMENT, prefix=" ", token="--short comment", spos=0, epos=15
)
comment = Comment(t, is_standalone=False)
comment = Comment(t, is_standalone=False, previous_node=None)
return comment


Expand All @@ -38,7 +38,7 @@ def standalone_comment() -> Comment:
t = Token(
type=TokenType.COMMENT, prefix=" ", token="-- short comment", spos=0, epos=16
)
comment = Comment(t, is_standalone=True)
comment = Comment(t, is_standalone=True, previous_node=None)
return comment


Expand All @@ -47,7 +47,7 @@ def multiline_comment() -> Comment:
t = Token(
type=TokenType.COMMENT, prefix=" ", token="/*\ncomment\n*/", spos=0, epos=15
)
comment = Comment(t, is_standalone=True)
comment = Comment(t, is_standalone=True, previous_node=None)
return comment


Expand Down Expand Up @@ -82,7 +82,7 @@ def test_str_len(
def test_render_inline(
short_comment: Comment, nospace_comment: Comment, standalone_comment: Comment
) -> None:
expected = " -- short comment\n"
expected = "-- short comment\n"
assert short_comment.render_inline() == expected
assert nospace_comment.render_inline() == expected

Expand All @@ -107,7 +107,7 @@ def test_render_standalone(short_comment: Comment, prefix: str) -> None:
def test_render_standalone_wrap_strip_whitespace() -> None:
txt = "-- foo" + " " * 100 + "bar"
t = Token(type=TokenType.COMMENT, prefix="", token=txt, spos=0, epos=len(txt))
comment = Comment(t, is_standalone=True)
comment = Comment(t, is_standalone=True, previous_node=None)
assert comment.render_standalone(max_length=88, prefix="") == "-- foo\n-- bar\n"


Expand Down Expand Up @@ -135,7 +135,7 @@ def test_split_before(text: str, expected_splits: List[str]) -> None:

def test_empty_comment() -> None:
t = Token(type=TokenType.COMMENT, prefix=" ", token="-- ", spos=0, epos=3)
comment = Comment(t, is_standalone=True)
comment = Comment(t, is_standalone=True, previous_node=None)
assert str(comment) == "--\n"


Expand All @@ -156,7 +156,7 @@ def test_no_wrap_long_jinja_comments() -> None:
spos=0,
epos=len(comment_str),
)
comment = Comment(t, is_standalone=True)
comment = Comment(t, is_standalone=True, previous_node=None)
rendered = comment.render_standalone(88, "")

assert rendered == comment_str + "\n"
12 changes: 8 additions & 4 deletions tests/unit_tests/test_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,18 @@ def test_comment_rendering(
epos=last_node.token.epos + 13,
)

inline_comment = Comment(token=comment_token, is_standalone=False)
inline_comment = Comment(
token=comment_token, is_standalone=False, previous_node=last_node
)
simple_line.comments = [inline_comment]
expected_inline_render = (
str(simple_line).rstrip() + " " + normalized_comment + "\n"
)
assert simple_line.render_with_comments(88) == expected_inline_render

standalone_comment = Comment(token=comment_token, is_standalone=True)
standalone_comment = Comment(
token=comment_token, is_standalone=True, previous_node=None
)
simple_line.comments = [standalone_comment]
expected_standalone_render = (
simple_line.prefix + normalized_comment + "\n" + str(simple_line)
Expand All @@ -234,7 +238,7 @@ def test_long_comment_wrapping(simple_line: Line) -> None:
spos=last_node.token.epos,
epos=last_node.token.epos + 13,
)
comment = Comment(token=comment_token, is_standalone=True)
comment = Comment(token=comment_token, is_standalone=True, previous_node=None)
simple_line.comments = [comment]
expected_render = (
"-- asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf "
Expand Down Expand Up @@ -264,7 +268,7 @@ def test_long_comments_that_are_not_wrapped(
spos=last_node.token.epos,
epos=last_node.token.epos + 13,
)
comment = Comment(token=comment_token, is_standalone=True)
comment = Comment(token=comment_token, is_standalone=True, previous_node=None)
simple_line.comments = [comment]
expected_render = raw_comment + "\nwith abc as (select * from my_table)\n"
assert simple_line.render_with_comments(88) == expected_render
Expand Down

0 comments on commit 80b3563

Please sign in to comment.