Skip to content

Commit

Permalink
fix/#639/support databricks query hints (#647)
Browse files Browse the repository at this point in the history
* fix: support databricks type hint comments

* chore: update changelog

* fix: add unit test for coverage
  • Loading branch information
tconbeer authored Nov 22, 2024
1 parent d37dfbe commit dca12e6
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.

- sqlfmt no longer adds a space between the function name and parens for `filter()` and `isnull()` (but it also permits `filter ()` and `isnull ()` to support dialects where those are operators, not function names) ([#641](https://github.com/tconbeer/sqlfmt/issues/641) - thank you [@williamscs](https://github.com/williamscs) and [@hongtron](https://github.com/hongtron)!).
- sqlfmt now supports Spark type-hinted numeric literals like `32y` and `+3.2e6bd` and will not introduce a space between the digits and their type suffix ([#640](https://github.com/tconbeer/sqlfmt/issues/640) - thank you [@ShaneMazur](https://github.com/ShaneMazur)!).
- sqlfmt now supports Databricks query hint comments like `/*+ COALESCE(3) */` ([#639](https://github.com/tconbeer/sqlfmt/issues/639) - thank you [@wr-atlas](https://github.com/wr-atlas)!).

## [0.23.3] - 2024-11-12

Expand Down
14 changes: 11 additions & 3 deletions src/sqlfmt/comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,18 @@ def __str__(self) -> str:
without preceding whitespace, with a single space between the marker
and the comment text.
"""
if self.is_multiline or self.formatting_disabled:
return f"{self.token.token}"
if (
self.is_multiline
or self.formatting_disabled
or self.is_databricks_query_hint
):
return self.token.token
else:
marker, comment_text = self._comment_parts()
if comment_text:
return f"{marker} {comment_text}"
else:
return f"{marker}"
return marker

def __len__(self) -> int:
return len(str(self))
Expand Down Expand Up @@ -85,6 +89,10 @@ def is_multiline(self) -> bool:
def is_c_style(self) -> bool:
return self.token.token.startswith("/*")

@property
def is_databricks_query_hint(self) -> bool:
return self.token.token.startswith("/*+")

@property
def is_inline(self) -> bool:
return not self.is_standalone and not self.is_multiline and not self.is_c_style
Expand Down
6 changes: 6 additions & 0 deletions src/sqlfmt/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ def _extract_components(
raise CannotMergeException(
"Can't merge lines with inline comments and other comments"
)
elif any(
[comment.is_databricks_query_hint for comment in line.comments]
):
raise CannotMergeException(
"Can't merge lines with a databricks type hint comment"
)
elif (
len(line.comments) == 1
and len(line.nodes) > 1
Expand Down
102 changes: 102 additions & 0 deletions tests/data/unformatted/134_databricks_type_hints.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
-- see: https://github.com/tconbeer/sqlfmt/issues/639
-- source: https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-hints.html
SELECT /*+ COALESCE(3) */ * FROM t;
SELECT /*+ REPARTITION(3) */ * FROM t;
SELECT /*+ REPARTITION(c) */ * FROM t;
SELECT /*+ REPARTITION(3, c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t;
SELECT /*+ BROADCAST(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ BROADCASTJOIN (t1) */ * FROM t1 left JOIN t2 ON t1.key = t2.key;
SELECT /*+ MAPJOIN(t2) */ * FROM t1 right JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle sort merge join
SELECT /*+ SHUFFLE_MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGEJOIN(t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle hash join
SELECT /*+ SHUFFLE_HASH(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;

-- Join Hints for shuffle-and-replicate nested loop join
SELECT /*+ SHUFFLE_REPLICATE_NL(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ BROADCAST(t1), MERGE(t1, t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
)))))__SQLFMT_OUTPUT__(((((
-- see: https://github.com/tconbeer/sqlfmt/issues/639
-- source:
-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-hints.html
select /*+ COALESCE(3) */
*
from t
;
select /*+ REPARTITION(3) */
*
from t
;
select /*+ REPARTITION(c) */
*
from t
;
select /*+ REPARTITION(3, c) */
*
from t
;
select /*+ REPARTITION_BY_RANGE(c) */
*
from t
;
select /*+ REPARTITION_BY_RANGE(3, c) */
*
from t
;
select /*+ BROADCAST(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ BROADCASTJOIN (t1) */
*
from t1
left join t2 on t1.key = t2.key
;
select /*+ MAPJOIN(t2) */
*
from t1
right join t2 on t1.key = t2.key
;

-- Join Hints for shuffle sort merge join
select /*+ SHUFFLE_MERGE(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ MERGEJOIN(t2) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ MERGE(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;

-- Join Hints for shuffle hash join
select /*+ SHUFFLE_HASH(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;

-- Join Hints for shuffle-and-replicate nested loop join
select /*+ SHUFFLE_REPLICATE_NL(t1) */
*
from t1
inner join t2 on t1.key = t2.key
;
select /*+ BROADCAST(t1), MERGE(t1, t2) */
*
from t1
inner join t2 on t1.key = t2.key
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
select /*+ my query hint */
1
)))))__SQLFMT_OUTPUT__(((((
select
1
1 change: 1 addition & 0 deletions tests/functional_tests/test_general_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"unformatted/131_assignment_statement.sql",
"unformatted/132_spark_number_literals.sql",
"unformatted/133_for_else.sql",
"unformatted/134_databricks_type_hints.sql",
"unformatted/200_base_model.sql",
"unformatted/201_basic_snapshot.sql",
"unformatted/202_unpivot_macro.sql",
Expand Down
31 changes: 31 additions & 0 deletions tests/unit_tests/test_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from sqlfmt.comment import Comment
from sqlfmt.node import Node
from sqlfmt.node_manager import NodeManager
from sqlfmt.token import Token, TokenType

Expand Down Expand Up @@ -60,6 +61,30 @@ def multiline_comment() -> Comment:
return comment


@pytest.fixture
def datbricks_query_hint_comment() -> Comment:
n = Node(
Token(
type=TokenType.UNTERM_KEYWORD,
prefix="",
token="select",
spos=0,
epos=6,
),
previous_node=None,
prefix="",
value="select",
open_brackets=[],
open_jinja_blocks=[],
formatting_disabled=[],
)
t = Token(
type=TokenType.COMMENT, prefix=" ", token="/*+ hint here */", spos=6, epos=23
)
comment = Comment(t, is_standalone=False, previous_node=n)
return comment


@pytest.fixture
def fmt_disabled_comment() -> Comment:
t = Token(type=TokenType.FMT_OFF, prefix="", token="--fmt: off", spos=0, epos=10)
Expand All @@ -81,11 +106,13 @@ def test_get_marker(
short_mysql_comment: Comment,
nospace_comment: Comment,
short_js_comment: Comment,
datbricks_query_hint_comment: Comment,
) -> None:
assert short_comment._get_marker() == ("--", 3)
assert short_mysql_comment._get_marker() == ("#", 2)
assert short_js_comment._get_marker() == ("//", 3)
assert nospace_comment._get_marker() == ("--", 2)
assert datbricks_query_hint_comment._get_marker() == ("/*", 2)


def test_comment_parts(
Expand Down Expand Up @@ -210,3 +237,7 @@ def test_no_wrap_long_jinja_comments() -> None:
rendered = comment.render_standalone(88, "")

assert rendered == comment_str + "\n"


def test_no_add_space_databricks_hint(datbricks_query_hint_comment: Comment) -> None:
assert str(datbricks_query_hint_comment) == datbricks_query_hint_comment.token.token
12 changes: 12 additions & 0 deletions tests/unit_tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,3 +560,15 @@ def test_do_not_merge_operator_sequences_across_commas(merger: LineMerger) -> No
merged_lines = merger.maybe_merge_lines(raw_query.lines)
result_string = "".join([str(line) for line in merged_lines])
assert result_string == expected_string


def test_do_not_merge_databricks_query_hints(merger: LineMerger) -> None:
source_string, expected_string = read_test_data(
"unit_tests/test_merger/test_no_merge_databricks_query_hints.sql"
)
raw_query = merger.mode.dialect.initialize_analyzer(
merger.mode.line_length
).parse_query(source_string)
merged_lines = merger.maybe_merge_lines(raw_query.lines)
result_string = "".join([str(line) for line in merged_lines])
assert result_string == expected_string

0 comments on commit dca12e6

Please sign in to comment.